vendor/clang/clang-release_350-r216957

author: Dimitry Andric <dim@FreeBSD.org> 2014-11-24 09:15:30 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2014-11-24 09:15:30 +0000
commit: 9f4dbff6669c8037f3b036bcf580d14f1a4f12a5 (patch)
tree: 47df2c12b57214af6c31e47404b005675b8b7ffc /lib/CodeGen
parent: f73d5f23a889b93d89ddef61ac0995df40286bb8 (diff)
download: src-test2-9f4dbff6669c8037f3b036bcf580d14f1a4f12a5.tar.gz
src-test2-9f4dbff6669c8037f3b036bcf580d14f1a4f12a5.zip
69 files changed, 18610 insertions, 11766 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 468fe0439967..d3ec46c4c4a1 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -11,8 +11,8 @@
 #define CLANG_CODEGEN_ABIINFO_H
 
 #include "clang/AST/Type.h"
-#include "llvm/IR/Type.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Type.h"
 
 namespace llvm {
   class Value;
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 90b0f68bd693..cec48f35a2e9 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -13,13 +13,15 @@
 #include "clang/Basic/TargetOptions.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "clang/Frontend/Utils.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
@@ -36,6 +38,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
+#include <memory>
 using namespace clang;
 using namespace llvm;
 
@@ -55,38 +58,37 @@ class EmitAssemblyHelper {
   mutable FunctionPassManager *PerFunctionPasses;
 
 private:
-  PassManager *getCodeGenPasses(TargetMachine *TM) const {
+  PassManager *getCodeGenPasses() const {
     if (!CodeGenPasses) {
       CodeGenPasses = new PassManager();
-      CodeGenPasses->add(new DataLayout(TheModule));
+      CodeGenPasses->add(new DataLayoutPass(TheModule));
       if (TM)
         TM->addAnalysisPasses(*CodeGenPasses);
     }
     return CodeGenPasses;
   }
 
-  PassManager *getPerModulePasses(TargetMachine *TM) const {
+  PassManager *getPerModulePasses() const {
     if (!PerModulePasses) {
       PerModulePasses = new PassManager();
-      PerModulePasses->add(new DataLayout(TheModule));
+      PerModulePasses->add(new DataLayoutPass(TheModule));
       if (TM)
         TM->addAnalysisPasses(*PerModulePasses);
     }
     return PerModulePasses;
   }
 
-  FunctionPassManager *getPerFunctionPasses(TargetMachine *TM) const {
+  FunctionPassManager *getPerFunctionPasses() const {
     if (!PerFunctionPasses) {
       PerFunctionPasses = new FunctionPassManager(TheModule);
-      PerFunctionPasses->add(new DataLayout(TheModule));
+      PerFunctionPasses->add(new DataLayoutPass(TheModule));
       if (TM)
         TM->addAnalysisPasses(*PerFunctionPasses);
     }
     return PerFunctionPasses;
   }
 
-
-  void CreatePasses(TargetMachine *TM);
+  void CreatePasses();
 
   /// CreateTargetMachine - Generates the TargetMachine.
   /// Returns Null if it is unable to create the target machine.
@@ -101,8 +103,7 @@ private:
   /// AddEmitPasses - Add passes necessary to emit assembly or LLVM IR.
   ///
   /// \return True on success.
-  bool AddEmitPasses(BackendAction Action, formatted_raw_ostream &OS,
-                     TargetMachine *TM);
+  bool AddEmitPasses(BackendAction Action, formatted_raw_ostream &OS);
 
 public:
   EmitAssemblyHelper(DiagnosticsEngine &_Diags,
@@ -112,14 +113,19 @@ public:
                      Module *M)
     : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
       TheModule(M), CodeGenerationTime("Code Generation Time"),
-      CodeGenPasses(0), PerModulePasses(0), PerFunctionPasses(0) {}
+      CodeGenPasses(nullptr), PerModulePasses(nullptr),
+      PerFunctionPasses(nullptr) {}
 
   ~EmitAssemblyHelper() {
     delete CodeGenPasses;
     delete PerModulePasses;
     delete PerFunctionPasses;
+    if (CodeGenOpts.DisableFree)
+      BuryPointer(TM.release());
   }
 
+  std::unique_ptr<TargetMachine> TM;
+
   void EmitAssembly(BackendAction Action, raw_ostream *OS);
 };
 
@@ -162,6 +168,11 @@ static void addSampleProfileLoaderPass(const PassManagerBuilder &Builder,
   PM.add(createSampleProfileLoaderPass(CGOpts.SampleProfileFile));
 }
 
+static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder,
+                                     PassManagerBase &PM) {
+  PM.add(createAddDiscriminatorsPass());
+}
+
 static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
                                     PassManagerBase &PM) {
   PM.add(createBoundsCheckingPass());
@@ -169,20 +180,8 @@ static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
 
 static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
                                       PassManagerBase &PM) {
-  const PassManagerBuilderWrapper &BuilderWrapper =
-      static_cast<const PassManagerBuilderWrapper&>(Builder);
-  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
-  const LangOptions &LangOpts = BuilderWrapper.getLangOpts();
-  PM.add(createAddressSanitizerFunctionPass(
-      LangOpts.Sanitize.InitOrder,
-      LangOpts.Sanitize.UseAfterReturn,
-      LangOpts.Sanitize.UseAfterScope,
-      CGOpts.SanitizerBlacklistFile,
-      CGOpts.SanitizeAddressZeroBaseShadow));
-  PM.add(createAddressSanitizerModulePass(
-      LangOpts.Sanitize.InitOrder,
-      CGOpts.SanitizerBlacklistFile,
-      CGOpts.SanitizeAddressZeroBaseShadow));
+  PM.add(createAddressSanitizerFunctionPass());
+  PM.add(createAddressSanitizerModulePass());
 }
 
 static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
@@ -190,8 +189,7 @@ static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
   const PassManagerBuilderWrapper &BuilderWrapper =
       static_cast<const PassManagerBuilderWrapper&>(Builder);
   const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
-  PM.add(createMemorySanitizerPass(CGOpts.SanitizeMemoryTrackOrigins,
-                                   CGOpts.SanitizerBlacklistFile));
+  PM.add(createMemorySanitizerPass(CGOpts.SanitizeMemoryTrackOrigins));
 
   // MemorySanitizer inserts complex instrumentation that mostly follows
   // the logic of the original code, but operates on "shadow" values.
@@ -208,10 +206,7 @@ static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
 
 static void addThreadSanitizerPass(const PassManagerBuilder &Builder,
                                    PassManagerBase &PM) {
-  const PassManagerBuilderWrapper &BuilderWrapper =
-      static_cast<const PassManagerBuilderWrapper&>(Builder);
-  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
-  PM.add(createThreadSanitizerPass(CGOpts.SanitizerBlacklistFile));
+  PM.add(createThreadSanitizerPass());
 }
 
 static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
@@ -222,7 +217,7 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
   PM.add(createDataFlowSanitizerPass(CGOpts.SanitizerBlacklistFile));
 }
 
-void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
+void EmitAssemblyHelper::CreatePasses() {
   unsigned OptLevel = CodeGenOpts.OptimizationLevel;
   CodeGenOptions::InliningMethod Inlining = CodeGenOpts.getInlining();
 
@@ -240,10 +235,14 @@ void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
 
+  PMBuilder.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
   PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
   PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
 
+  PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
+                         addAddDiscriminatorsPass);
+
   if (!CodeGenOpts.SampleProfileFile.empty())
     PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
                            addSampleProfileLoaderPass);
@@ -298,19 +297,12 @@ void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
   PMBuilder.LibraryInfo = new TargetLibraryInfo(TargetTriple);
   if (!CodeGenOpts.SimplifyLibCalls)
     PMBuilder.LibraryInfo->disableAllFunctions();
-  
+
   switch (Inlining) {
   case CodeGenOptions::NoInlining: break;
   case CodeGenOptions::NormalInlining: {
-    // FIXME: Derive these constants in a principled fashion.
-    unsigned Threshold = 225;
-    if (CodeGenOpts.OptimizeSize == 1)      // -Os
-      Threshold = 75;
-    else if (CodeGenOpts.OptimizeSize == 2) // -Oz
-      Threshold = 25;
-    else if (OptLevel > 2)
-      Threshold = 275;
-    PMBuilder.Inliner = createFunctionInliningPass(Threshold);
+    PMBuilder.Inliner =
+        createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize);
     break;
   }
   case CodeGenOptions::OnlyAlwaysInlining:
@@ -324,13 +316,15 @@ void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
   }
 
   // Set up the per-function pass manager.
-  FunctionPassManager *FPM = getPerFunctionPasses(TM);
+  FunctionPassManager *FPM = getPerFunctionPasses();
   if (CodeGenOpts.VerifyModule)
     FPM->add(createVerifierPass());
   PMBuilder.populateFunctionPassManager(*FPM);
 
   // Set up the per-module pass manager.
-  PassManager *MPM = getPerModulePasses(TM);
+  PassManager *MPM = getPerModulePasses();
+  if (CodeGenOpts.VerifyModule)
+    MPM->add(createDebugInfoVerifierPass());
 
   if (!CodeGenOpts.DisableGCov &&
       (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
@@ -360,32 +354,19 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   if (!TheTarget) {
     if (MustCreateTM)
       Diags.Report(diag::err_fe_unable_to_create_target) << Error;
-    return 0;
+    return nullptr;
   }
 
-  // FIXME: Expose these capabilities via actual APIs!!!! Aside from just
-  // being gross, this is also totally broken if we ever care about
-  // concurrency.
-
-  TargetMachine::setAsmVerbosityDefault(CodeGenOpts.AsmVerbose);
-
-  TargetMachine::setFunctionSections(CodeGenOpts.FunctionSections);
-  TargetMachine::setDataSections    (CodeGenOpts.DataSections);
-
-  // FIXME: Parse this earlier.
-  llvm::CodeModel::Model CM;
-  if (CodeGenOpts.CodeModel == "small") {
-    CM = llvm::CodeModel::Small;
-  } else if (CodeGenOpts.CodeModel == "kernel") {
-    CM = llvm::CodeModel::Kernel;
-  } else if (CodeGenOpts.CodeModel == "medium") {
-    CM = llvm::CodeModel::Medium;
-  } else if (CodeGenOpts.CodeModel == "large") {
-    CM = llvm::CodeModel::Large;
-  } else {
-    assert(CodeGenOpts.CodeModel.empty() && "Invalid code model!");
-    CM = llvm::CodeModel::Default;
-  }
+  unsigned CodeModel =
+    llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
+      .Case("small", llvm::CodeModel::Small)
+      .Case("kernel", llvm::CodeModel::Kernel)
+      .Case("medium", llvm::CodeModel::Medium)
+      .Case("large", llvm::CodeModel::Large)
+      .Case("default", llvm::CodeModel::Default)
+      .Default(~0u);
+  assert(CodeModel != ~0u && "invalid code model!");
+  llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel);
 
   SmallVector<const char *, 16> BackendArgs;
   BackendArgs.push_back("clang"); // Fake program name.
@@ -402,8 +383,8 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   for (unsigned i = 0, e = CodeGenOpts.BackendOptions.size(); i != e; ++i)
     BackendArgs.push_back(CodeGenOpts.BackendOptions[i].c_str());
   if (CodeGenOpts.NoGlobalMerge)
-    BackendArgs.push_back("-global-merge=false");
-  BackendArgs.push_back(0);
+    BackendArgs.push_back("-enable-global-merge=false");
+  BackendArgs.push_back(nullptr);
   llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
                                     BackendArgs.data());
 
@@ -437,6 +418,12 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
 
   llvm::TargetOptions Options;
 
+  if (CodeGenOpts.DisableIntegratedAS)
+    Options.DisableIntegratedAS = true;
+
+  if (CodeGenOpts.CompressDebugSections)
+    Options.CompressDebugSections = true;
+
   // Set frame pointer elimination mode.
   if (!CodeGenOpts.DisableFPElim) {
     Options.NoFramePointerElim = false;
@@ -482,32 +469,27 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   Options.TrapFuncName = CodeGenOpts.TrapFuncName;
   Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
-  Options.EnableSegmentedStacks = CodeGenOpts.EnableSegmentedStacks;
+  Options.FunctionSections = CodeGenOpts.FunctionSections;
+  Options.DataSections = CodeGenOpts.DataSections;
+
+  Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
+  Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
+  Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
+  Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
+  Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
 
   TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU,
                                                      FeaturesStr, Options,
                                                      RM, CM, OptLevel);
 
-  if (CodeGenOpts.RelaxAll)
-    TM->setMCRelaxAll(true);
-  if (CodeGenOpts.SaveTempLabels)
-    TM->setMCSaveTempLabels(true);
-  if (CodeGenOpts.NoDwarf2CFIAsm)
-    TM->setMCUseCFI(false);
-  if (!CodeGenOpts.NoDwarfDirectoryAsm)
-    TM->setMCUseDwarfDirectory(true);
-  if (CodeGenOpts.NoExecStack)
-    TM->setMCNoExecStack(true);
-
   return TM;
 }
 
 bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
-                                       formatted_raw_ostream &OS,
-                                       TargetMachine *TM) {
+                                       formatted_raw_ostream &OS) {
 
   // Create the code generator passes.
-  PassManager *PM = getCodeGenPasses(TM);
+  PassManager *PM = getCodeGenPasses();
 
   // Add LibraryInfo.
   llvm::Triple TargetTriple(TheModule->getTargetTriple());
@@ -546,33 +528,34 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
 }
 
 void EmitAssemblyHelper::EmitAssembly(BackendAction Action, raw_ostream *OS) {
-  TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : 0);
+  TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
   llvm::formatted_raw_ostream FormattedOS;
 
   bool UsesCodeGen = (Action != Backend_EmitNothing &&
                       Action != Backend_EmitBC &&
                       Action != Backend_EmitLL);
-  TargetMachine *TM = CreateTargetMachine(UsesCodeGen);
+  if (!TM)
+    TM.reset(CreateTargetMachine(UsesCodeGen));
+
   if (UsesCodeGen && !TM) return;
-  llvm::OwningPtr<TargetMachine> TMOwner(CodeGenOpts.DisableFree ? 0 : TM);
-  CreatePasses(TM);
+  CreatePasses();
 
   switch (Action) {
   case Backend_EmitNothing:
     break;
 
   case Backend_EmitBC:
-    getPerModulePasses(TM)->add(createBitcodeWriterPass(*OS));
+    getPerModulePasses()->add(createBitcodeWriterPass(*OS));
     break;
 
   case Backend_EmitLL:
     FormattedOS.setStream(*OS, formatted_raw_ostream::PRESERVE_STREAM);
-    getPerModulePasses(TM)->add(createPrintModulePass(&FormattedOS));
+    getPerModulePasses()->add(createPrintModulePass(FormattedOS));
     break;
 
   default:
     FormattedOS.setStream(*OS, formatted_raw_ostream::PRESERVE_STREAM);
-    if (!AddEmitPasses(Action, FormattedOS, TM))
+    if (!AddEmitPasses(Action, FormattedOS))
       return;
   }
 
@@ -607,10 +590,23 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, raw_ostream *OS) {
 void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
                               const CodeGenOptions &CGOpts,
                               const clang::TargetOptions &TOpts,
-                              const LangOptions &LOpts,
-                              Module *M,
-                              BackendAction Action, raw_ostream *OS) {
+                              const LangOptions &LOpts, StringRef TDesc,
+                              Module *M, BackendAction Action,
+                              raw_ostream *OS) {
   EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
 
   AsmHelper.EmitAssembly(Action, OS);
+
+  // If an optional clang TargetInfo description string was passed in, use it to
+  // verify the LLVM TargetMachine's DataLayout.
+  if (AsmHelper.TM && !TDesc.empty()) {
+    std::string DLDesc =
+        AsmHelper.TM->getDataLayout()->getStringRepresentation();
+    if (DLDesc != TDesc) {
+      unsigned DiagID = Diags.getCustomDiagID(
+          DiagnosticsEngine::Error, "backend data layout '%0' does not match "
+                                    "expected target description '%1'");
+      Diags.Report(DiagID) << DLDesc << TDesc;
+    }
+  }
 }
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 0df2a4000e28..89bde2ce20d9 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -24,16 +24,6 @@
 using namespace clang;
 using namespace CodeGen;
 
-// The ABI values for various atomic memory orderings.
-enum AtomicOrderingKind {
-  AO_ABI_memory_order_relaxed = 0,
-  AO_ABI_memory_order_consume = 1,
-  AO_ABI_memory_order_acquire = 2,
-  AO_ABI_memory_order_release = 3,
-  AO_ABI_memory_order_acq_rel = 4,
-  AO_ABI_memory_order_seq_cst = 5
-};
-
 namespace {
   class AtomicInfo {
     CodeGenFunction &CGF;
@@ -57,10 +47,10 @@ namespace {
       ASTContext &C = CGF.getContext();
 
       uint64_t valueAlignInBits;
-      llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
+      std::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
 
       uint64_t atomicAlignInBits;
-      llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
+      std::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
 
       assert(ValueSizeInBits <= AtomicSizeInBits);
       assert(valueAlignInBits <= atomicAlignInBits);
@@ -184,10 +174,139 @@ bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
   return true;
 }
 
-static void
-EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
-             llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
-             uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) {
+static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
+                              llvm::Value *Dest, llvm::Value *Ptr,
+                              llvm::Value *Val1, llvm::Value *Val2,
+                              uint64_t Size, unsigned Align,
+                              llvm::AtomicOrdering SuccessOrder,
+                              llvm::AtomicOrdering FailureOrder) {
+  // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
+  llvm::LoadInst *Expected = CGF.Builder.CreateLoad(Val1);
+  Expected->setAlignment(Align);
+  llvm::LoadInst *Desired = CGF.Builder.CreateLoad(Val2);
+  Desired->setAlignment(Align);
+
+  llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
+      Ptr, Expected, Desired, SuccessOrder, FailureOrder);
+  Pair->setVolatile(E->isVolatile());
+  Pair->setWeak(IsWeak);
+
+  // Cmp holds the result of the compare-exchange operation: true on success,
+  // false on failure.
+  llvm::Value *Old = CGF.Builder.CreateExtractValue(Pair, 0);
+  llvm::Value *Cmp = CGF.Builder.CreateExtractValue(Pair, 1);
+
+  // This basic block is used to hold the store instruction if the operation
+  // failed.
+  llvm::BasicBlock *StoreExpectedBB =
+      CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn);
+
+  // This basic block is the exit point of the operation, we should end up
+  // here regardless of whether or not the operation succeeded.
+  llvm::BasicBlock *ContinueBB =
+      CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
+
+  // Update Expected if Expected isn't equal to Old, otherwise branch to the
+  // exit point.
+  CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB);
+
+  CGF.Builder.SetInsertPoint(StoreExpectedBB);
+  // Update the memory at Expected with Old's value.
+  llvm::StoreInst *StoreExpected = CGF.Builder.CreateStore(Old, Val1);
+  StoreExpected->setAlignment(Align);
+  // Finally, branch to the exit point.
+  CGF.Builder.CreateBr(ContinueBB);
+
+  CGF.Builder.SetInsertPoint(ContinueBB);
+  // Update the memory at Dest with Cmp's value.
+  CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
+  return;
+}
+
+/// Given an ordering required on success, emit all possible cmpxchg
+/// instructions to cope with the provided (but possibly only dynamically known)
+/// FailureOrder.
+static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
+                                        bool IsWeak, llvm::Value *Dest,
+                                        llvm::Value *Ptr, llvm::Value *Val1,
+                                        llvm::Value *Val2,
+                                        llvm::Value *FailureOrderVal,
+                                        uint64_t Size, unsigned Align,
+                                        llvm::AtomicOrdering SuccessOrder) {
+  llvm::AtomicOrdering FailureOrder;
+  if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
+    switch (FO->getSExtValue()) {
+    default:
+      FailureOrder = llvm::Monotonic;
+      break;
+    case AtomicExpr::AO_ABI_memory_order_consume:
+    case AtomicExpr::AO_ABI_memory_order_acquire:
+      FailureOrder = llvm::Acquire;
+      break;
+    case AtomicExpr::AO_ABI_memory_order_seq_cst:
+      FailureOrder = llvm::SequentiallyConsistent;
+      break;
+    }
+    if (FailureOrder >= SuccessOrder) {
+      // Don't assert on undefined behaviour.
+      FailureOrder =
+        llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
+    }
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, Align,
+                      SuccessOrder, FailureOrder);
+    return;
+  }
+
+  // Create all the relevant BB's
+  llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
+                   *SeqCstBB = nullptr;
+  MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
+  if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release)
+    AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
+  if (SuccessOrder == llvm::SequentiallyConsistent)
+    SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
+
+  llvm::BasicBlock *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
+
+  llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(FailureOrderVal, MonotonicBB);
+
+  // Emit all the different atomics
+
+  // MonotonicBB is arbitrarily chosen as the default case; in practice, this
+  // doesn't matter unless someone is crazy enough to use something that
+  // doesn't fold to a constant for the ordering.
+  CGF.Builder.SetInsertPoint(MonotonicBB);
+  emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
+                    Size, Align, SuccessOrder, llvm::Monotonic);
+  CGF.Builder.CreateBr(ContBB);
+
+  if (AcquireBB) {
+    CGF.Builder.SetInsertPoint(AcquireBB);
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
+                      Size, Align, SuccessOrder, llvm::Acquire);
+    CGF.Builder.CreateBr(ContBB);
+    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+                AcquireBB);
+    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+                AcquireBB);
+  }
+  if (SeqCstBB) {
+    CGF.Builder.SetInsertPoint(SeqCstBB);
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
+                      Size, Align, SuccessOrder, llvm::SequentiallyConsistent);
+    CGF.Builder.CreateBr(ContBB);
+    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+                SeqCstBB);
+  }
+
+  CGF.Builder.SetInsertPoint(ContBB);
+}
+
+static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
+                         llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
+                         llvm::Value *IsWeak, llvm::Value *FailureOrder,
+                         uint64_t Size, unsigned Align,
+                         llvm::AtomicOrdering Order) {
   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
   llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
 
@@ -196,25 +315,43 @@ EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
     llvm_unreachable("Already handled!");
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
+                                FailureOrder, Size, Align, Order);
+    return;
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+    emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
+                                FailureOrder, Size, Align, Order);
+    return;
   case AtomicExpr::AO__atomic_compare_exchange:
   case AtomicExpr::AO__atomic_compare_exchange_n: {
-    // Note that cmpxchg only supports specifying one ordering and
-    // doesn't support weak cmpxchg, at least at the moment.
-    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
-    LoadVal1->setAlignment(Align);
-    llvm::LoadInst *LoadVal2 = CGF.Builder.CreateLoad(Val2);
-    LoadVal2->setAlignment(Align);
-    llvm::AtomicCmpXchgInst *CXI =
-        CGF.Builder.CreateAtomicCmpXchg(Ptr, LoadVal1, LoadVal2, Order);
-    CXI->setVolatile(E->isVolatile());
-    llvm::StoreInst *StoreVal1 = CGF.Builder.CreateStore(CXI, Val1);
-    StoreVal1->setAlignment(Align);
-    llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(CXI, LoadVal1);
-    CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
+    if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
+      emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
+                                  Val1, Val2, FailureOrder, Size, Align, Order);
+    } else {
+      // Create all the relevant BB's
+      llvm::BasicBlock *StrongBB =
+          CGF.createBasicBlock("cmpxchg.strong", CGF.CurFn);
+      llvm::BasicBlock *WeakBB = CGF.createBasicBlock("cmxchg.weak", CGF.CurFn);
+      llvm::BasicBlock *ContBB =
+          CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
+
+      llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(IsWeak, WeakBB);
+      SI->addCase(CGF.Builder.getInt1(false), StrongBB);
+
+      CGF.Builder.SetInsertPoint(StrongBB);
+      emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
+                                  FailureOrder, Size, Align, Order);
+      CGF.Builder.CreateBr(ContBB);
+
+      CGF.Builder.SetInsertPoint(WeakBB);
+      emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
+                                  FailureOrder, Size, Align, Order);
+      CGF.Builder.CreateBr(ContBB);
+
+      CGF.Builder.SetInsertPoint(ContBB);
+    }
     return;
   }
-
   case AtomicExpr::AO__c11_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
   case AtomicExpr::AO__atomic_load: {
@@ -351,17 +488,18 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
   bool UseLibcall = (Size != Align ||
                      getContext().toBits(sizeChars) > MaxInlineWidthInBits);
 
-  llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0;
-  Ptr = EmitScalarExpr(E->getPtr());
+  llvm::Value *IsWeak = nullptr, *OrderFail = nullptr, *Val1 = nullptr,
+              *Val2 = nullptr;
+  llvm::Value *Ptr = EmitScalarExpr(E->getPtr());
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
     assert(!Dest && "Init does not return a value");
     LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext());
     EmitAtomicInit(E->getVal1(), lvalue);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
-  Order = EmitScalarExpr(E->getOrder());
+  llvm::Value *Order = EmitScalarExpr(E->getOrder());
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -394,9 +532,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     else
       Val2 = EmitValToTemp(*this, E->getVal2());
     OrderFail = EmitScalarExpr(E->getOrderFail());
-    // Evaluate and discard the 'weak' argument.
     if (E->getNumSubExprs() == 6)
-      EmitScalarExpr(E->getWeak());
+      IsWeak = EmitScalarExpr(E->getWeak());
     break;
 
   case AtomicExpr::AO__c11_atomic_fetch_add:
@@ -476,6 +613,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     Args.add(RValue::get(EmitCastToVoidPtr(Ptr)), getContext().VoidPtrTy);
 
     std::string LibCallName;
+    QualType LoweredMemTy =
+      MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
     QualType RetTy;
     bool HaveRetTy = false;
     switch (E->getOp()) {
@@ -531,7 +670,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     case AtomicExpr::AO__c11_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
       LibCallName = "__atomic_fetch_add";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
                         E->getExprLoc());
       break;
     // T __atomic_fetch_and_N(T *mem, T val, int order)
@@ -552,7 +691,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     case AtomicExpr::AO__c11_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
       LibCallName = "__atomic_fetch_sub";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
                         E->getExprLoc());
       break;
     // T __atomic_fetch_xor_N(T *mem, T val, int order)
@@ -593,7 +732,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     if (!RetTy->isVoidType())
       return Res;
     if (E->getType()->isVoidType())
-      return RValue::get(0);
+      return RValue::get(nullptr);
     return convertTempToRValue(Dest, E->getType(), E->getExprLoc());
   }
 
@@ -615,32 +754,32 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
   if (isa<llvm::ConstantInt>(Order)) {
     int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     switch (ord) {
-    case AO_ABI_memory_order_relaxed:
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::Monotonic);
+    case AtomicExpr::AO_ABI_memory_order_relaxed:
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                   Size, Align, llvm::Monotonic);
       break;
-    case AO_ABI_memory_order_consume:
-    case AO_ABI_memory_order_acquire:
+    case AtomicExpr::AO_ABI_memory_order_consume:
+    case AtomicExpr::AO_ABI_memory_order_acquire:
       if (IsStore)
         break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::Acquire);
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                   Size, Align, llvm::Acquire);
       break;
-    case AO_ABI_memory_order_release:
+    case AtomicExpr::AO_ABI_memory_order_release:
       if (IsLoad)
         break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::Release);
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                   Size, Align, llvm::Release);
       break;
-    case AO_ABI_memory_order_acq_rel:
+    case AtomicExpr::AO_ABI_memory_order_acq_rel:
       if (IsLoad || IsStore)
         break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::AcquireRelease);
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                   Size, Align, llvm::AcquireRelease);
       break;
-    case AO_ABI_memory_order_seq_cst:
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                   llvm::SequentiallyConsistent);
+    case AtomicExpr::AO_ABI_memory_order_seq_cst:
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                   Size, Align, llvm::SequentiallyConsistent);
       break;
     default: // invalid order
       // We should not ever get here normally, but it's hard to
@@ -648,15 +787,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
       break;
     }
     if (E->getType()->isVoidType())
-      return RValue::get(0);
+      return RValue::get(nullptr);
     return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc());
   }
 
   // Long case, when Order isn't obviously constant.
 
   // Create all the relevant BB's
-  llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *ReleaseBB = 0,
-                   *AcqRelBB = 0, *SeqCstBB = 0;
+  llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
+                   *ReleaseBB = nullptr, *AcqRelBB = nullptr,
+                   *SeqCstBB = nullptr;
   MonotonicBB = createBasicBlock("monotonic", CurFn);
   if (!IsStore)
     AcquireBB = createBasicBlock("acquire", CurFn);
@@ -676,41 +816,46 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
 
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-               llvm::Monotonic);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+               Size, Align, llvm::Monotonic);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                 llvm::Acquire);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                 Size, Align, llvm::Acquire);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(1), AcquireBB);
-    SI->addCase(Builder.getInt32(2), AcquireBB);
+    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+                AcquireBB);
+    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+                AcquireBB);
   }
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                 llvm::Release);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                 Size, Align, llvm::Release);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(3), ReleaseBB);
+    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_release),
+                ReleaseBB);
   }
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-                 llvm::AcquireRelease);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                 Size, Align, llvm::AcquireRelease);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(4), AcqRelBB);
+    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acq_rel),
+                AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
-               llvm::SequentiallyConsistent);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+               Size, Align, llvm::SequentiallyConsistent);
   Builder.CreateBr(ContBB);
-  SI->addCase(Builder.getInt32(5), SeqCstBB);
+  SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+              SeqCstBB);
 
   // Cleanup and return
   Builder.SetInsertPoint(ContBB);
   if (E->getType()->isVoidType())
-    return RValue::get(0);
+    return RValue::get(nullptr);
   return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc());
 }
 
@@ -761,8 +906,8 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
              getContext().VoidPtrTy);
     args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
              getContext().VoidPtrTy);
-    args.add(RValue::get(llvm::ConstantInt::get(IntTy,
-                                                AO_ABI_memory_order_seq_cst)),
+    args.add(RValue::get(llvm::ConstantInt::get(
+                 IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
              getContext().IntTy);
     emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
 
@@ -788,7 +933,7 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
 
   // If we're ignoring an aggregate return, don't do anything.
   if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
-    return RValue::getAggregate(0, false);
+    return RValue::getAggregate(nullptr, false);
 
   // The easiest way to do this this is to go through memory, but we
   // try not to in some easy cases.
@@ -911,8 +1056,8 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, bool isInit) {
              getContext().VoidPtrTy);
     args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
              getContext().VoidPtrTy);
-    args.add(RValue::get(llvm::ConstantInt::get(IntTy,
-                                                AO_ABI_memory_order_seq_cst)),
+    args.add(RValue::get(llvm::ConstantInt::get(
+                 IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
              getContext().IntTy);
     emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
     return;
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 692f9a0dd63f..72fde9dc55f1 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -18,9 +18,9 @@
 #include "CodeGenModule.h"
 #include "clang/AST/DeclObjC.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
 #include <algorithm>
 #include <cstdio>
 
@@ -30,9 +30,9 @@ using namespace CodeGen;
 CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
   : Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false),
     HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false),
-    StructureType(0), Block(block),
-    DominatingIP(0) {
-    
+    StructureType(nullptr), Block(block),
+    DominatingIP(nullptr) {
+
   // Skip asm prefix, if any.  'name' is usually taken directly from
   // the mangled name of the enclosing function.
   if (!name.empty() && name[0] == '\01')
@@ -53,7 +53,7 @@ static llvm::Constant *buildCopyHelper(CodeGenModule &CGM,
   return CodeGenFunction(CGM).GenerateCopyHelperFunction(blockInfo);
 }
 
-/// Build the helper function to dipose of a block.
+/// Build the helper function to dispose of a block.
 static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
                                           const CGBlockInfo &blockInfo) {
   return CodeGenFunction(CGM).GenerateDestroyHelperFunction(blockInfo);
@@ -78,7 +78,13 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
   ASTContext &C = CGM.getContext();
 
   llvm::Type *ulong = CGM.getTypes().ConvertType(C.UnsignedLongTy);
-  llvm::Type *i8p = CGM.getTypes().ConvertType(C.VoidPtrTy);
+  llvm::Type *i8p = NULL;
+  if (CGM.getLangOpts().OpenCL)
+    i8p = 
+      llvm::Type::getInt8PtrTy(
+           CGM.getLLVMContext(), C.getTargetAddressSpace(LangAS::opencl_constant));
+  else
+    i8p = CGM.getTypes().ConvertType(C.VoidPtrTy);
 
   SmallVector<llvm::Constant*, 6> elements;
 
@@ -246,7 +252,7 @@ static bool isSafeForCXXConstantCapture(QualType type) {
   // Only records can be unsafe.
   if (!recordType) return true;
 
-  const CXXRecordDecl *record = cast<CXXRecordDecl>(recordType->getDecl());
+  const auto *record = cast<CXXRecordDecl>(recordType->getDecl());
 
   // Maintain semantics for classes with non-trivial dtors or copy ctors.
   if (!record->hasTrivialDestructor()) return false;
@@ -269,7 +275,7 @@ static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM,
   QualType type = var->getType();
 
   // We can only do this if the variable is const.
-  if (!type.isConstQualified()) return 0;
+  if (!type.isConstQualified()) return nullptr;
 
   // Furthermore, in C++ we have to worry about mutable fields:
   // C++ [dcl.type.cv]p4:
@@ -277,13 +283,13 @@ static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM,
   //   modified, any attempt to modify a const object during its
   //   lifetime results in undefined behavior.
   if (CGM.getLangOpts().CPlusPlus && !isSafeForCXXConstantCapture(type))
-    return 0;
+    return nullptr;
 
   // If the variable doesn't have any initializer (shouldn't this be
   // invalid?), it's not clear what we should do.  Maybe capture as
   // zero?
   const Expr *init = var->getInit();
-  if (!init) return 0;
+  if (!init) return nullptr;
 
   return CGM.EmitConstantInit(*var, CGF);
 }
@@ -300,8 +306,8 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
 
   // The header is basically a 'struct { void *; int; int; void *; void *; }'.
   CharUnits ptrSize, ptrAlign, intSize, intAlign;
-  llvm::tie(ptrSize, ptrAlign) = C.getTypeInfoInChars(C.VoidPtrTy);
-  llvm::tie(intSize, intAlign) = C.getTypeInfoInChars(C.IntTy);
+  std::tie(ptrSize, ptrAlign) = C.getTypeInfoInChars(C.VoidPtrTy);
+  std::tie(intSize, intAlign) = C.getTypeInfoInChars(C.IntTy);
 
   // Are there crazy embedded platforms where this isn't true?
   assert(intSize <= ptrSize && "layout assumptions horribly violated");
@@ -366,15 +372,14 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
 
     layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
                                       Qualifiers::OCL_None,
-                                      0, llvmType));
+                                      nullptr, llvmType));
   }
 
   // Next, all the block captures.
-  for (BlockDecl::capture_const_iterator ci = block->capture_begin(),
-         ce = block->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
+  for (const auto &CI : block->captures()) {
+    const VarDecl *variable = CI.getVariable();
 
-    if (ci->isByRef()) {
+    if (CI.isByRef()) {
       // We have to copy/dispose of the __block reference.
       info.NeedsCopyDispose = true;
 
@@ -387,8 +392,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
       maxFieldAlign = std::max(maxFieldAlign, tinfo.second);
 
       layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
-                                        Qualifiers::OCL_None,
-                                        &*ci, llvmType));
+                                        Qualifiers::OCL_None, &CI, llvmType));
       continue;
     }
 
@@ -422,7 +426,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
       lifetime = Qualifiers::OCL_Strong;
 
     // So do types that require non-trivial copy construction.
-    } else if (ci->hasCopyExpr()) {
+    } else if (CI.hasCopyExpr()) {
       info.NeedsCopyDispose = true;
       info.HasCXXObject = true;
 
@@ -446,7 +450,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
     llvm::Type *llvmType =
       CGM.getTypes().ConvertTypeForMem(VT);
     
-    layout.push_back(BlockLayoutChunk(align, size, lifetime, &*ci, llvmType));
+    layout.push_back(BlockLayoutChunk(align, size, lifetime, &CI, llvmType));
   }
 
   // If that was everything, we're done here.
@@ -581,14 +585,13 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
 
   // Walk through the captures (in order) and find the ones not
   // captured by constant.
-  for (BlockDecl::capture_const_iterator ci = block->capture_begin(),
-         ce = block->capture_end(); ci != ce; ++ci) {
+  for (const auto &CI : block->captures()) {
     // Ignore __block captures; there's nothing special in the
     // on-stack block that we need to do for them.
-    if (ci->isByRef()) continue;
+    if (CI.isByRef()) continue;
 
     // Ignore variables that are constant-captured.
-    const VarDecl *variable = ci->getVariable();
+    const VarDecl *variable = CI.getVariable();
     CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
     if (capture.isConstant()) continue;
 
@@ -664,7 +667,7 @@ void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) {
     CGBlockInfo *cur = head;
     head = cur->NextBlockInfo;
     delete cur;
-  } while (head != 0);
+  } while (head != nullptr);
 }
 
 /// Emit a block literal expression in the current function.
@@ -679,7 +682,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
   }
 
   // Find the block info for this block and take ownership of it.
-  OwningPtr<CGBlockInfo> blockInfo;
+  std::unique_ptr<CGBlockInfo> blockInfo;
   blockInfo.reset(findAndRemoveBlockInfo(&FirstBlockInfo,
                                          blockExpr->getBlockDecl()));
 
@@ -741,9 +744,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   }
 
   // Next, captured variables.
-  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-         ce = blockDecl->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
+  for (const auto &CI : blockDecl->captures()) {
+    const VarDecl *variable = CI.getVariable();
     const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
 
     // Ignore constant captures.
@@ -761,7 +763,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     // Compute the address of the thing we're going to move into the
     // block literal.
     llvm::Value *src;
-    if (BlockInfo && ci->isNested()) {
+    if (BlockInfo && CI.isNested()) {
       // We need to use the capture from the enclosing block.
       const CGBlockInfo::Capture &enclosingCapture =
         BlockInfo->getCapture(variable);
@@ -773,15 +775,15 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     } else if (blockDecl->isConversionFromLambda()) {
       // The lambda capture in a lambda's conversion-to-block-pointer is
       // special; we'll simply emit it directly.
-      src = 0;
+      src = nullptr;
     } else {
       // Just look it up in the locals map, which will give us back a
       // [[type]]*.  If that doesn't work, do the more elaborate DRE
       // emission.
       src = LocalDeclMap.lookup(variable);
       if (!src) {
-        DeclRefExpr declRef(const_cast<VarDecl*>(variable),
-                            /*refersToEnclosing*/ ci->isNested(), type,
+        DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+                            /*refersToEnclosing*/ CI.isNested(), type,
                             VK_LValue, SourceLocation());
         src = EmitDeclRefLValue(&declRef).getAddress();
       }
@@ -791,9 +793,9 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     // the block field.  There's no need to chase the forwarding
     // pointer at this point, since we're building something that will
     // live a shorter life than the stack byref anyway.
-    if (ci->isByRef()) {
+    if (CI.isByRef()) {
       // Get a void* that points to the byref struct.
-      if (ci->isNested())
+      if (CI.isNested())
         src = Builder.CreateAlignedLoad(src, align.getQuantity(),
                                         "byref.capture");
       else
@@ -803,7 +805,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
       Builder.CreateAlignedStore(src, blockField, align.getQuantity());
 
     // If we have a copy constructor, evaluate that into the block field.
-    } else if (const Expr *copyExpr = ci->getCopyExpr()) {
+    } else if (const Expr *copyExpr = CI.getCopyExpr()) {
       if (blockDecl->isConversionFromLambda()) {
         // If we have a lambda conversion, emit the expression
         // directly into the block instead.
@@ -845,13 +847,14 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     } else {
       // Fake up a new variable so that EmitScalarInit doesn't think
       // we're referring to the variable in its own initializer.
-      ImplicitParamDecl blockFieldPseudoVar(/*DC*/ 0, SourceLocation(),
-                                            /*name*/ 0, type);
+      ImplicitParamDecl blockFieldPseudoVar(getContext(), /*DC*/ nullptr,
+                                            SourceLocation(), /*name*/ nullptr,
+                                            type);
 
       // We use one of these or the other depending on whether the
       // reference is nested.
       DeclRefExpr declRef(const_cast<VarDecl*>(variable),
-                          /*refersToEnclosing*/ ci->isNested(), type,
+                          /*refersToEnclosing*/ CI.isNested(), type,
                           VK_LValue, SourceLocation());
 
       ImplicitCastExpr l2r(ImplicitCastExpr::OnStack, type, CK_LValueToRValue,
@@ -862,7 +865,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     }
 
     // Activate the cleanup if layout pushed one.
-    if (!ci->isByRef()) {
+    if (!CI.isByRef()) {
       EHScopeStack::stable_iterator cleanup = capture.getCleanup();
       if (cleanup.isValid())
         ActivateCleanupBlock(cleanup, blockInfo.DominatingIP);
@@ -1023,7 +1026,7 @@ CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *blockExpr,
   blockInfo.BlockExpression = blockExpr;
 
   // Compute information about the layout, etc., of this block.
-  computeBlockInfo(*this, 0, blockInfo);
+  computeBlockInfo(*this, nullptr, blockInfo);
 
   // Using that metadata, generate the actual block function.
   llvm::Constant *blockFn;
@@ -1097,7 +1100,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   // to be local to this function as well, in case they're directly
   // referenced in a block.
   for (DeclMapTy::const_iterator i = ldm.begin(), e = ldm.end(); i != e; ++i) {
-    const VarDecl *var = dyn_cast<VarDecl>(i->first);
+    const auto *var = dyn_cast<VarDecl>(i->first);
     if (var && !var->hasLocalStorage())
       LocalDeclMap[var] = i->second;
   }
@@ -1112,35 +1115,32 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   QualType selfTy = getContext().VoidPtrTy;
   IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor");
 
-  ImplicitParamDecl selfDecl(const_cast<BlockDecl*>(blockDecl),
+  ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl),
                              SourceLocation(), II, selfTy);
   args.push_back(&selfDecl);
 
   // Now add the rest of the parameters.
-  for (BlockDecl::param_const_iterator i = blockDecl->param_begin(),
-       e = blockDecl->param_end(); i != e; ++i)
-    args.push_back(*i);
+  for (auto i : blockDecl->params())
+    args.push_back(i);
 
   // Create the function declaration.
   const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType();
-  const CGFunctionInfo &fnInfo =
-    CGM.getTypes().arrangeFunctionDeclaration(fnType->getResultType(), args,
-                                              fnType->getExtInfo(),
-                                              fnType->isVariadic());
-  if (CGM.ReturnTypeUsesSRet(fnInfo))
+  const CGFunctionInfo &fnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      fnType->getReturnType(), args, fnType->getExtInfo(),
+      fnType->isVariadic());
+  if (CGM.ReturnSlotInterferesWithArgs(fnInfo))
     blockInfo.UsesStret = true;
 
   llvm::FunctionType *fnLLVMType = CGM.getTypes().GetFunctionType(fnInfo);
 
-  MangleBuffer name;
-  CGM.getBlockMangledName(GD, name, blockDecl);
-  llvm::Function *fn =
-    llvm::Function::Create(fnLLVMType, llvm::GlobalValue::InternalLinkage, 
-                           name.getString(), &CGM.getModule());
+  StringRef name = CGM.getBlockMangledName(GD, blockDecl);
+  llvm::Function *fn = llvm::Function::Create(
+      fnLLVMType, llvm::GlobalValue::InternalLinkage, name, &CGM.getModule());
   CGM.SetInternalFunctionAttributes(blockDecl, fn, fnInfo);
 
   // Begin generating the function.
-  StartFunction(blockDecl, fnType->getResultType(), fn, fnInfo, args,
+  StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args,
+                blockDecl->getLocation(),
                 blockInfo.getBlockExpr()->getBody()->getLocStart());
 
   // Okay.  Undo some of what StartFunction did.
@@ -1177,9 +1177,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   }
 
   // Also force all the constant captures.
-  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-         ce = blockDecl->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
+  for (const auto &CI : blockDecl->captures()) {
+    const VarDecl *variable = CI.getVariable();
     const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
     if (!capture.isConstant()) continue;
 
@@ -1201,8 +1200,14 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
 
   if (IsLambdaConversionToBlock)
     EmitLambdaBlockInvokeBody();
-  else
+  else {
+    PGO.assignRegionCounters(blockDecl, fn);
+    RegionCounter Cnt = getPGORegionCounter(blockDecl->getBody());
+    Cnt.beginRegion(Builder);
     EmitStmt(blockDecl->getBody());
+    PGO.emitInstrumentationData();
+    PGO.destroyRegionCounters();
+  }
 
   // Remember where we were...
   llvm::BasicBlock *resume = Builder.GetInsertBlock();
@@ -1214,9 +1219,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   // Emit debug information for all the DeclRefExprs.
   // FIXME: also for 'this'
   if (CGDebugInfo *DI = getDebugInfo()) {
-    for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-           ce = blockDecl->capture_end(); ci != ce; ++ci) {
-      const VarDecl *variable = ci->getVariable();
+    for (const auto &CI : blockDecl->captures()) {
+      const VarDecl *variable = CI.getVariable();
       DI->EmitLocation(Builder, variable->getLocation());
 
       if (CGM.getCodeGenOpts().getDebugInfo()
@@ -1238,7 +1242,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   }
 
   // And resume where we left off.
-  if (resume == 0)
+  if (resume == nullptr)
     Builder.ClearInsertionPoint();
   else
     Builder.SetInsertPoint(resume);
@@ -1280,15 +1284,15 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
   ASTContext &C = getContext();
 
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(0, SourceLocation(), 0, C.VoidPtrTy);
+  ImplicitParamDecl dstDecl(getContext(), nullptr, SourceLocation(), nullptr,
+                            C.VoidPtrTy);
   args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(0, SourceLocation(), 0, C.VoidPtrTy);
+  ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
+                            C.VoidPtrTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI =
-    CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
-                                              FunctionType::ExtInfo(),
-                                              /*variadic*/ false);
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
 
   // FIXME: it would be nice if these were mergeable with things with
   // identical semantics.
@@ -1304,13 +1308,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
   FunctionDecl *FD = FunctionDecl::Create(C,
                                           C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false,
                                           false);
   // Create a scope with an artificial location for the body of this function.
   ArtificialLocation AL(*this, Builder);
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   AL.Emit();
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
@@ -1325,25 +1329,24 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
 
   const BlockDecl *blockDecl = blockInfo.getBlockDecl();
 
-  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-         ce = blockDecl->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
+  for (const auto &CI : blockDecl->captures()) {
+    const VarDecl *variable = CI.getVariable();
     QualType type = variable->getType();
 
     const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
     if (capture.isConstant()) continue;
 
-    const Expr *copyExpr = ci->getCopyExpr();
+    const Expr *copyExpr = CI.getCopyExpr();
     BlockFieldFlags flags;
 
     bool useARCWeakCopy = false;
     bool useARCStrongCopy = false;
 
     if (copyExpr) {
-      assert(!ci->isByRef());
+      assert(!CI.isByRef());
       // don't bother computing flags
 
-    } else if (ci->isByRef()) {
+    } else if (CI.isByRef()) {
       flags = BLOCK_FIELD_IS_BYREF;
       if (type.isObjCGCWeak())
         flags |= BLOCK_FIELD_IS_WEAK;
@@ -1400,7 +1403,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
         // storeStrong doesn't over-release) and then call storeStrong.
         // This is a workaround to not having an initStrong call.
         if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-          llvm::PointerType *ty = cast<llvm::PointerType>(srcValue->getType());
+          auto *ty = cast<llvm::PointerType>(srcValue->getType());
           llvm::Value *null = llvm::ConstantPointerNull::get(ty);
           Builder.CreateStore(null, dstField);
           EmitARCStoreStrongCall(dstField, srcValue, true);
@@ -1423,7 +1426,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
         };
 
         bool copyCanThrow = false;
-        if (ci->isByRef() && variable->getType()->getAsCXXRecordDecl()) {
+        if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) {
           const Expr *copyExpr =
             CGM.getContext().getBlockVarCopyInits(variable);
           if (copyExpr) {
@@ -1457,13 +1460,12 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
   ASTContext &C = getContext();
 
   FunctionArgList args;
-  ImplicitParamDecl srcDecl(0, SourceLocation(), 0, C.VoidPtrTy);
+  ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
+                            C.VoidPtrTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI =
-    CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
-                                              FunctionType::ExtInfo(),
-                                              /*variadic*/ false);
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
 
   // FIXME: We'd like to put these into a mergable by content, with
   // internal linkage.
@@ -1478,12 +1480,12 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
 
   FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false, false);
   // Create a scope with an artificial location for the body of this function.
   ArtificialLocation AL(*this, Builder);
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   AL.Emit();
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
@@ -1496,21 +1498,20 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
 
   CodeGenFunction::RunCleanupsScope cleanups(*this);
 
-  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-         ce = blockDecl->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
+  for (const auto &CI : blockDecl->captures()) {
+    const VarDecl *variable = CI.getVariable();
     QualType type = variable->getType();
 
     const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
     if (capture.isConstant()) continue;
 
     BlockFieldFlags flags;
-    const CXXDestructorDecl *dtor = 0;
+    const CXXDestructorDecl *dtor = nullptr;
 
     bool useARCWeakDestroy = false;
     bool useARCStrongDestroy = false;
 
-    if (ci->isByRef()) {
+    if (CI.isByRef()) {
       flags = BLOCK_FIELD_IS_BYREF;
       if (type.isObjCGCWeak())
         flags |= BLOCK_FIELD_IS_WEAK;
@@ -1587,7 +1588,7 @@ public:
     : ByrefHelpers(alignment), Flags(flags) {}
 
   void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) {
+                llvm::Value *srcField) override {
     destField = CGF.Builder.CreateBitCast(destField, CGF.VoidPtrTy);
 
     srcField = CGF.Builder.CreateBitCast(srcField, CGF.VoidPtrPtrTy);
@@ -1602,14 +1603,14 @@ public:
     CGF.EmitNounwindRuntimeCall(fn, args);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
     field = CGF.Builder.CreateBitCast(field, CGF.Int8PtrTy->getPointerTo(0));
     llvm::Value *value = CGF.Builder.CreateLoad(field);
 
     CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER);
   }
 
-  void profileImpl(llvm::FoldingSetNodeID &id) const {
+  void profileImpl(llvm::FoldingSetNodeID &id) const override {
     id.AddInteger(Flags.getBitMask());
   }
 };
@@ -1620,15 +1621,15 @@ public:
   ARCWeakByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
 
   void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) {
+                llvm::Value *srcField) override {
     CGF.EmitARCMoveWeak(destField, srcField);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
     CGF.EmitARCDestroyWeak(field);
   }
 
-  void profileImpl(llvm::FoldingSetNodeID &id) const {
+  void profileImpl(llvm::FoldingSetNodeID &id) const override {
     // 0 is distinguishable from all pointers and byref flags
     id.AddInteger(0);
   }
@@ -1641,7 +1642,7 @@ public:
   ARCStrongByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
 
   void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) {
+                llvm::Value *srcField) override {
     // Do a "move" by copying the value and then zeroing out the old
     // variable.
 
@@ -1665,11 +1666,11 @@ public:
     store->setAlignment(Alignment.getQuantity());
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
     CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
   }
 
-  void profileImpl(llvm::FoldingSetNodeID &id) const {
+  void profileImpl(llvm::FoldingSetNodeID &id) const override {
     // 1 is distinguishable from all pointers and byref flags
     id.AddInteger(1);
   }
@@ -1682,7 +1683,7 @@ public:
   ARCStrongBlockByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
 
   void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) {
+                llvm::Value *srcField) override {
     // Do the copy with objc_retainBlock; that's all that
     // _Block_object_assign would do anyway, and we'd have to pass the
     // right arguments to make sure it doesn't get no-op'ed.
@@ -1695,11 +1696,11 @@ public:
     store->setAlignment(Alignment.getQuantity());
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
     CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
   }
 
-  void profileImpl(llvm::FoldingSetNodeID &id) const {
+  void profileImpl(llvm::FoldingSetNodeID &id) const override {
     // 2 is distinguishable from all pointers and byref flags
     id.AddInteger(2);
   }
@@ -1716,20 +1717,20 @@ public:
                   const Expr *copyExpr)
     : ByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {}
 
-  bool needsCopy() const { return CopyExpr != 0; }
+  bool needsCopy() const override { return CopyExpr != nullptr; }
   void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) {
+                llvm::Value *srcField) override {
     if (!CopyExpr) return;
     CGF.EmitSynthesizedCXXCopyCtor(destField, srcField, CopyExpr);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) {
+  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
     EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin();
     CGF.PushDestructorCleanup(VarType, field);
     CGF.PopCleanupBlocks(cleanupDepth);
   }
 
-  void profileImpl(llvm::FoldingSetNodeID &id) const {
+  void profileImpl(llvm::FoldingSetNodeID &id) const override {
     id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr());
   }
 };
@@ -1745,16 +1746,16 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
   QualType R = Context.VoidTy;
 
   FunctionArgList args;
-  ImplicitParamDecl dst(0, SourceLocation(), 0, Context.VoidPtrTy);
+  ImplicitParamDecl dst(CGF.getContext(), nullptr, SourceLocation(), nullptr,
+                        Context.VoidPtrTy);
   args.push_back(&dst);
 
-  ImplicitParamDecl src(0, SourceLocation(), 0, Context.VoidPtrTy);
+  ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
+                        Context.VoidPtrTy);
   args.push_back(&src);
 
-  const CGFunctionInfo &FI =
-    CGF.CGM.getTypes().arrangeFunctionDeclaration(R, args,
-                                                  FunctionType::ExtInfo(),
-                                                  /*variadic*/ false);
+  const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
+      R, args, FunctionType::ExtInfo(), /*variadic=*/false);
 
   CodeGenTypes &Types = CGF.CGM.getTypes();
   llvm::FunctionType *LTy = Types.GetFunctionType(FI);
@@ -1771,11 +1772,11 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
   FunctionDecl *FD = FunctionDecl::Create(Context,
                                           Context.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, R, 0,
+                                          SourceLocation(), II, R, nullptr,
                                           SC_Static,
                                           false, false);
 
-  CGF.StartFunction(FD, R, Fn, FI, args, SourceLocation());
+  CGF.StartFunction(FD, R, Fn, FI, args);
 
   if (byrefInfo.needsCopy()) {
     llvm::Type *byrefPtrType = byrefType.getPointerTo(0);
@@ -1819,13 +1820,12 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
   QualType R = Context.VoidTy;
 
   FunctionArgList args;
-  ImplicitParamDecl src(0, SourceLocation(), 0, Context.VoidPtrTy);
+  ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
+                        Context.VoidPtrTy);
   args.push_back(&src);
 
-  const CGFunctionInfo &FI =
-    CGF.CGM.getTypes().arrangeFunctionDeclaration(R, args,
-                                                  FunctionType::ExtInfo(),
-                                                  /*variadic*/ false);
+  const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
+      R, args, FunctionType::ExtInfo(), /*variadic=*/false);
 
   CodeGenTypes &Types = CGF.CGM.getTypes();
   llvm::FunctionType *LTy = Types.GetFunctionType(FI);
@@ -1843,10 +1843,10 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
   FunctionDecl *FD = FunctionDecl::Create(Context,
                                           Context.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, R, 0,
+                                          SourceLocation(), II, R, nullptr,
                                           SC_Static,
                                           false, false);
-  CGF.StartFunction(FD, R, Fn, FI, args, SourceLocation());
+  CGF.StartFunction(FD, R, Fn, FI, args);
 
   if (byrefInfo.needsDispose()) {
     llvm::Value *V = CGF.GetAddrOfLocalVar(&src);
@@ -1913,7 +1913,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
 
   if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
     const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var);
-    if (!copyExpr && record->hasTrivialDestructor()) return 0;
+    if (!copyExpr && record->hasTrivialDestructor()) return nullptr;
 
     CXXByrefHelpers byrefInfo(emission.Alignment, type, copyExpr);
     return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
@@ -1921,7 +1921,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
 
   // Otherwise, if we don't have a retainable type, there's nothing to do.
   // that the runtime does extra copies.
-  if (!type->isObjCRetainableType()) return 0;
+  if (!type->isObjCRetainableType()) return nullptr;
 
   Qualifiers qs = type.getQualifiers();
 
@@ -1935,7 +1935,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
     // These are just bits as far as the runtime is concerned.
     case Qualifiers::OCL_ExplicitNone:
     case Qualifiers::OCL_Autoreleasing:
-      return 0;
+      return nullptr;
 
     // Tell the runtime that this is ARC __weak, called by the
     // byref routines.
@@ -1969,7 +1969,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
              type->isObjCObjectPointerType()) {
     flags |= BLOCK_FIELD_IS_OBJECT;
   } else {
-    return 0;
+    return nullptr;
   }
 
   if (type.isObjCGCWeak())
@@ -2218,7 +2218,7 @@ namespace {
     llvm::Value *Addr;
     CallBlockRelease(llvm::Value *Addr) : Addr(Addr) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Should we be passing FIELD_IS_WEAK here?
       CGF.BuildBlockRelease(Addr, BLOCK_FIELD_IS_BYREF);
     }
@@ -2242,9 +2242,8 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM,
                                          llvm::Constant *C) {
   if (!CGM.getLangOpts().BlocksRuntimeOptional) return;
 
-  llvm::GlobalValue *GV = cast<llvm::GlobalValue>(C->stripPointerCasts());
-  if (GV->isDeclaration() &&
-      GV->getLinkage() == llvm::GlobalValue::ExternalLinkage)
+  auto *GV = cast<llvm::GlobalValue>(C->stripPointerCasts());
+  if (GV->isDeclaration() && GV->hasExternalLinkage())
     GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
 }
 
@@ -2277,7 +2276,8 @@ llvm::Constant *CodeGenModule::getNSConcreteGlobalBlock() {
     return NSConcreteGlobalBlock;
 
   NSConcreteGlobalBlock = GetOrCreateLLVMGlobal("_NSConcreteGlobalBlock",
-                                                Int8PtrTy->getPointerTo(), 0);
+                                                Int8PtrTy->getPointerTo(),
+                                                nullptr);
   configureBlocksRuntimeObject(*this, NSConcreteGlobalBlock);
   return NSConcreteGlobalBlock;
 }
@@ -2287,7 +2287,8 @@ llvm::Constant *CodeGenModule::getNSConcreteStackBlock() {
     return NSConcreteStackBlock;
 
   NSConcreteStackBlock = GetOrCreateLLVMGlobal("_NSConcreteStackBlock",
-                                               Int8PtrTy->getPointerTo(), 0);
+                                               Int8PtrTy->getPointerTo(),
+                                               nullptr);
   configureBlocksRuntimeObject(*this, NSConcreteStackBlock);
   return NSConcreteStackBlock;  
 }
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index 020638a55810..0031e32c9daf 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -28,15 +28,15 @@
 #include "llvm/IR/Module.h"
 
 namespace llvm {
-  class Module;
-  class Constant;
-  class Function;
-  class GlobalValue;
-  class DataLayout;
-  class FunctionType;
-  class PointerType;
-  class Value;
-  class LLVMContext;
+class Module;
+class Constant;
+class Function;
+class GlobalValue;
+class DataLayout;
+class FunctionType;
+class PointerType;
+class Value;
+class LLVMContext;
 }
 
 namespace clang {
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index fd21e7e26a3b..f113b970b7b7 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -15,12 +15,39 @@
 namespace clang {
 namespace CodeGen {
 
+class CodeGenFunction;
+
+/// \brief This is an IRBuilder insertion helper that forwards to
+/// CodeGenFunction::InsertHelper, which adds nesessary metadata to
+/// instructions.
+template <bool PreserveNames>
+class CGBuilderInserter
+  : protected llvm::IRBuilderDefaultInserter<PreserveNames> {
+public:
+  CGBuilderInserter() : CGF(nullptr) {}
+  explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
+
+protected:
+  /// \brief This forwards to CodeGenFunction::InsertHelper.
+  void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
+                    llvm::BasicBlock *BB,
+                    llvm::BasicBlock::iterator InsertPt) const;
+private:
+  void operator=(const CGBuilderInserter &) LLVM_DELETED_FUNCTION;
+
+  CodeGenFunction *CGF;
+};
+
 // Don't preserve names on values in an optimized build.
 #ifdef NDEBUG
-typedef llvm::IRBuilder<false> CGBuilderTy;
+#define PreserveNames false
 #else
-typedef llvm::IRBuilder<> CGBuilderTy;
+#define PreserveNames true
 #endif
+typedef CGBuilderInserter<PreserveNames> CGBuilderInserterTy;
+typedef llvm::IRBuilder<PreserveNames, llvm::ConstantFolder,
+                        CGBuilderInserterTy> CGBuilderTy;
+#undef PreserveNames
 
 }  // end namespace CodeGen
 }  // end namespace clang
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 7726ad309d8a..4f68b347dbf0 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -212,11 +212,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   default: break;  // Handle intrinsics and libm functions below.
   case Builtin::BI__builtin___CFStringMakeConstantString:
   case Builtin::BI__builtin___NSStringMakeConstantString:
-    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
+    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
   case Builtin::BI__builtin_stdarg_start:
   case Builtin::BI__builtin_va_start:
+  case Builtin::BI__va_start:
   case Builtin::BI__builtin_va_end: {
-    Value *ArgValue = EmitVAListRef(E->getArg(0));
+    Value *ArgValue = (BuiltinID == Builtin::BI__va_start)
+                          ? EmitScalarExpr(E->getArg(0))
+                          : EmitVAListRef(E->getArg(0));
     llvm::Type *DestType = Int8PtrTy;
     if (ArgValue->getType() != DestType)
       ArgValue = Builder.CreateBitCast(ArgValue, DestType,
@@ -429,6 +432,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
     return RValue::get(Builder.CreateCall(F));
   }
+  case Builtin::BI__builtin___clear_cache: {
+    Value *Begin = EmitScalarExpr(E->getArg(0));
+    Value *End = EmitScalarExpr(E->getArg(1));
+    Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
+    return RValue::get(Builder.CreateCall2(F, Begin, End));
+  }
   case Builtin::BI__builtin_trap: {
     Value *F = CGM.getIntrinsic(Intrinsic::trap);
     return RValue::get(Builder.CreateCall(F));
@@ -438,17 +447,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(Builder.CreateCall(F));
   }
   case Builtin::BI__builtin_unreachable: {
-    if (SanOpts->Unreachable)
+    if (SanOpts->Unreachable) {
+      SanitizerScope SanScope(this);
       EmitCheck(Builder.getFalse(), "builtin_unreachable",
                 EmitCheckSourceLocation(E->getExprLoc()),
                 ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
-    else
+    } else
       Builder.CreateUnreachable();
 
     // We do need to preserve an insertion point.
     EmitBlock(createBasicBlock("unreachable.cont"));
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__builtin_powi:
@@ -781,7 +791,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // We do need to preserve an insertion point.
     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
   case Builtin::BI__builtin_unwind_init: {
     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
@@ -847,7 +857,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // We do need to preserve an insertion point.
     EmitBlock(createBasicBlock("longjmp.cont"));
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_sub:
@@ -964,7 +974,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
 
     Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
+                                                llvm::SequentiallyConsistent,
                                                 llvm::SequentiallyConsistent);
+    Result = Builder.CreateExtractValue(Result, 0);
     Result = EmitFromInt(*this, Result, T, ValueType);
     return RValue::get(Result);
   }
@@ -988,10 +1000,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
 
-    Value *OldVal = Args[1];
-    Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
-                                                 llvm::SequentiallyConsistent);
-    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
+    Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
+                                              llvm::SequentiallyConsistent,
+                                              llvm::SequentiallyConsistent);
+    Value *Result = Builder.CreateExtractValue(Pair, 1);
     // zext bool to int.
     Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
     return RValue::get(Result);
@@ -1026,7 +1038,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
     Store->setAlignment(StoreSize.getQuantity());
     Store->setAtomic(llvm::Release);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__sync_synchronize: {
@@ -1038,7 +1050,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // to use it with non-atomic loads and stores to get acquire/release
     // semantics.
     Builder.CreateFence(llvm::SequentiallyConsistent);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__c11_atomic_is_lock_free:
@@ -1079,7 +1091,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     Value *Order = EmitScalarExpr(E->getArg(1));
     if (isa<llvm::ConstantInt>(Order)) {
       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-      AtomicRMWInst *Result = 0;
+      AtomicRMWInst *Result = nullptr;
       switch (ord) {
       case 0:  // memory_order_relaxed
       default: // invalid order
@@ -1179,7 +1191,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
         Store->setOrdering(llvm::SequentiallyConsistent);
         break;
       }
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
 
     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
@@ -1209,7 +1221,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     SI->addCase(Builder.getInt32(5), BBs[2]);
 
     Builder.SetInsertPoint(ContBB);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__atomic_thread_fence:
@@ -1243,7 +1255,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
         break;
       }
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
 
     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
@@ -1278,7 +1290,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     SI->addCase(Builder.getInt32(5), SeqCstBB);
 
     Builder.SetInsertPoint(ContBB);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
     // Library functions with special handling.
@@ -1311,7 +1323,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::Type *ArgType = Base->getType();
     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
-    break;
   }
 
   case Builtin::BIfma:
@@ -1498,8 +1509,84 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   }
   case Builtin::BI__builtin_addressof:
     return RValue::get(EmitLValue(E->getArg(0)).getAddress());
+  case Builtin::BI__builtin_operator_new:
+    return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
+                                    E->getArg(0), false);
+  case Builtin::BI__builtin_operator_delete:
+    return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
+                                    E->getArg(0), true);
   case Builtin::BI__noop:
-    return RValue::get(0);
+    // __noop always evaluates to an integer literal zero.
+    return RValue::get(ConstantInt::get(IntTy, 0));
+  case Builtin::BI__assume:
+    // Until LLVM supports assumptions at the IR level, this becomes nothing.
+    return RValue::get(nullptr);
+  case Builtin::BI_InterlockedExchange:
+  case Builtin::BI_InterlockedExchangePointer:
+    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
+  case Builtin::BI_InterlockedCompareExchangePointer: {
+    llvm::Type *RTy;
+    llvm::IntegerType *IntType =
+      IntegerType::get(getLLVMContext(),
+                       getContext().getTypeSize(E->getType()));
+    llvm::Type *IntPtrType = IntType->getPointerTo();
+
+    llvm::Value *Destination =
+      Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
+
+    llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
+    RTy = Exchange->getType();
+    Exchange = Builder.CreatePtrToInt(Exchange, IntType);
+
+    llvm::Value *Comparand =
+      Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
+
+    auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+                                              SequentiallyConsistent,
+                                              SequentiallyConsistent);
+    Result->setVolatile(true);
+
+    return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
+                                                                         0),
+                                              RTy));
+  }
+  case Builtin::BI_InterlockedCompareExchange: {
+    AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
+        EmitScalarExpr(E->getArg(0)),
+        EmitScalarExpr(E->getArg(2)),
+        EmitScalarExpr(E->getArg(1)),
+        SequentiallyConsistent,
+        SequentiallyConsistent);
+      CXI->setVolatile(true);
+      return RValue::get(Builder.CreateExtractValue(CXI, 0));
+  }
+  case Builtin::BI_InterlockedIncrement: {
+    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+      AtomicRMWInst::Add,
+      EmitScalarExpr(E->getArg(0)),
+      ConstantInt::get(Int32Ty, 1),
+      llvm::SequentiallyConsistent);
+    RMWI->setVolatile(true);
+    return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
+  }
+  case Builtin::BI_InterlockedDecrement: {
+    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+      AtomicRMWInst::Sub,
+      EmitScalarExpr(E->getArg(0)),
+      ConstantInt::get(Int32Ty, 1),
+      llvm::SequentiallyConsistent);
+    RMWI->setVolatile(true);
+    return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
+  }
+  case Builtin::BI_InterlockedExchangeAdd: {
+    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+      AtomicRMWInst::Add,
+      EmitScalarExpr(E->getArg(0)),
+      EmitScalarExpr(E->getArg(1)),
+      llvm::SequentiallyConsistent);
+    RMWI->setVolatile(true);
+    return RValue::get(RMWI);
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -1518,8 +1605,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
   if (const char *Prefix =
-      llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()))
+          llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
+    // NOTE we dont need to perform a compatibility flag check here since the
+    // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
+    // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
+    if (IntrinsicID == Intrinsic::not_intrinsic)
+      IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
+  }
 
   if (IntrinsicID != Intrinsic::not_intrinsic) {
     SmallVector<Value*, 16> Args;
@@ -1590,11 +1683,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E) {
   switch (getTarget().getTriple().getArch()) {
-  case llvm::Triple::aarch64:
-    return EmitAArch64BuiltinExpr(BuiltinID, E);
   case llvm::Triple::arm:
+  case llvm::Triple::armeb:
   case llvm::Triple::thumb:
+  case llvm::Triple::thumbeb:
     return EmitARMBuiltinExpr(BuiltinID, E);
+  case llvm::Triple::aarch64:
+  case llvm::Triple::aarch64_be:
+  case llvm::Triple::arm64:
+  case llvm::Triple::arm64_be:
+    return EmitAArch64BuiltinExpr(BuiltinID, E);
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
     return EmitX86BuiltinExpr(BuiltinID, E);
@@ -1602,8 +1700,10 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
     return EmitPPCBuiltinExpr(BuiltinID, E);
+  case llvm::Triple::r600:
+    return EmitR600BuiltinExpr(BuiltinID, E);
   default:
-    return 0;
+    return nullptr;
   }
 }
 
@@ -1624,6 +1724,11 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
   case NeonTypeFlags::Int64:
   case NeonTypeFlags::Poly64:
     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
+  case NeonTypeFlags::Poly128:
+    // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+    // There is a lot of i128 and f128 API missing.
+    // so we use v16i8 to represent poly128 and get pattern matched.
+    return llvm::VectorType::get(CGF->Int8Ty, 16);
   case NeonTypeFlags::Float32:
     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
   case NeonTypeFlags::Float64:
@@ -1749,2182 +1854,1232 @@ CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
   return std::make_pair(EmitScalarExpr(Addr), Align);
 }
 
-static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
-                                           unsigned BuiltinID,
-                                           const CallExpr *E) {
-  unsigned int Int = 0;
-  // Scalar result generated across vectors
-  bool AcrossVec = false;
-  // Extend element of one-element vector
-  bool ExtendEle = false;
-  bool OverloadInt = false;
-  bool OverloadCmpInt = false;
-  bool IsFpCmpZInt = false;
-  bool OverloadCvtInt = false;
-  bool OverloadWideInt = false;
-  bool OverloadNarrowInt = false;
-  const char *s = NULL;
-
-  SmallVector<Value *, 4> Ops;
-  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
-    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
-  }
-
-  // AArch64 scalar builtins are not overloaded, they do not have an extra
-  // argument that specifies the vector type, need to handle each case.
-  switch (BuiltinID) {
-  default: break;
-  case AArch64::BI__builtin_neon_vdups_lane_f32:
-  case AArch64::BI__builtin_neon_vdupd_lane_f64:
-  case AArch64::BI__builtin_neon_vdups_laneq_f32:
-  case AArch64::BI__builtin_neon_vdupd_laneq_f64: {
-    return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane");
-  }
-  case AArch64::BI__builtin_neon_vdupb_lane_i8:
-  case AArch64::BI__builtin_neon_vduph_lane_i16:
-  case AArch64::BI__builtin_neon_vdups_lane_i32:
-  case AArch64::BI__builtin_neon_vdupd_lane_i64:
-  case AArch64::BI__builtin_neon_vdupb_laneq_i8:
-  case AArch64::BI__builtin_neon_vduph_laneq_i16:
-  case AArch64::BI__builtin_neon_vdups_laneq_i32:
-  case AArch64::BI__builtin_neon_vdupd_laneq_i64: {
-    // The backend treats Neon scalar types as v1ix types
-    // So we want to dup lane from any vector to v1ix vector
-    // with shufflevector
-    s = "vdup_lane";
-    Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1]));
-    Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s);
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    // AArch64 intrinsic one-element vector type cast to
-    // scalar type expected by the builtin
-    return CGF.Builder.CreateBitCast(Result, Ty, s);
-  }
-  case AArch64::BI__builtin_neon_vqdmlalh_lane_s16 :
-  case AArch64::BI__builtin_neon_vqdmlalh_laneq_s16 :
-  case AArch64::BI__builtin_neon_vqdmlals_lane_s32 :
-  case AArch64::BI__builtin_neon_vqdmlals_laneq_s32 :
-  case AArch64::BI__builtin_neon_vqdmlslh_lane_s16 :
-  case AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 :
-  case AArch64::BI__builtin_neon_vqdmlsls_lane_s32 :
-  case AArch64::BI__builtin_neon_vqdmlsls_laneq_s32 : {
-    Int = Intrinsic::arm_neon_vqadds;
-    if (BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_lane_s16 ||
-        BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 ||
-        BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_lane_s32 ||
-        BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_laneq_s32) {
-      Int = Intrinsic::arm_neon_vqsubs;
-    }
-    // create vqdmull call with b * c[i]
-    llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
-    llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
-    Ty = CGF.ConvertType(E->getArg(0)->getType());
-    llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
-    Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
-    Value *V = UndefValue::get(OpVTy);
-    llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
-    SmallVector<Value *, 2> MulOps;
-    MulOps.push_back(Ops[1]);
-    MulOps.push_back(Ops[2]);
-    MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
-    MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
-    MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
-    Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
-    // create vqadds call with a +/- vqdmull result
-    F = CGF.CGM.getIntrinsic(Int, ResVTy);
-    SmallVector<Value *, 2> AddOps;
-    AddOps.push_back(Ops[0]);
-    AddOps.push_back(MulRes);
-    V = UndefValue::get(ResVTy);
-    AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
-    Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
-    return CGF.Builder.CreateBitCast(AddRes, Ty);
-  }
-  case AArch64::BI__builtin_neon_vfmas_lane_f32:
-  case AArch64::BI__builtin_neon_vfmas_laneq_f32:
-  case AArch64::BI__builtin_neon_vfmad_lane_f64:
-  case AArch64::BI__builtin_neon_vfmad_laneq_f64: {
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
-    return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-  }
-  // Scalar Floating-point Multiply Extended
-  case AArch64::BI__builtin_neon_vmulxs_f32:
-  case AArch64::BI__builtin_neon_vmulxd_f64: {
-    Int = Intrinsic::aarch64_neon_vmulx;
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
-  }
-  case AArch64::BI__builtin_neon_vmul_n_f64: {
-    // v1f64 vmul_n_f64  should be mapped to Neon scalar mul lane
-    llvm::Type *VTy = GetNeonType(&CGF,
-      NeonTypeFlags(NeonTypeFlags::Float64, false, false));
-    Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy);
-    llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0);
-    Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract");
-    Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]);
-    return CGF.Builder.CreateBitCast(Result, VTy);
-  }
-  case AArch64::BI__builtin_neon_vget_lane_i8:
-  case AArch64::BI__builtin_neon_vget_lane_i16:
-  case AArch64::BI__builtin_neon_vget_lane_i32:
-  case AArch64::BI__builtin_neon_vget_lane_i64:
-  case AArch64::BI__builtin_neon_vget_lane_f32:
-  case AArch64::BI__builtin_neon_vget_lane_f64:
-  case AArch64::BI__builtin_neon_vgetq_lane_i8:
-  case AArch64::BI__builtin_neon_vgetq_lane_i16:
-  case AArch64::BI__builtin_neon_vgetq_lane_i32:
-  case AArch64::BI__builtin_neon_vgetq_lane_i64:
-  case AArch64::BI__builtin_neon_vgetq_lane_f32:
-  case AArch64::BI__builtin_neon_vgetq_lane_f64:
-    return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
-  case AArch64::BI__builtin_neon_vset_lane_i8:
-  case AArch64::BI__builtin_neon_vset_lane_i16:
-  case AArch64::BI__builtin_neon_vset_lane_i32:
-  case AArch64::BI__builtin_neon_vset_lane_i64:
-  case AArch64::BI__builtin_neon_vset_lane_f32:
-  case AArch64::BI__builtin_neon_vset_lane_f64:
-  case AArch64::BI__builtin_neon_vsetq_lane_i8:
-  case AArch64::BI__builtin_neon_vsetq_lane_i16:
-  case AArch64::BI__builtin_neon_vsetq_lane_i32:
-  case AArch64::BI__builtin_neon_vsetq_lane_i64:
-  case AArch64::BI__builtin_neon_vsetq_lane_f32:
-  case AArch64::BI__builtin_neon_vsetq_lane_f64:
-    return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);
-  // Crypto
-  case AArch64::BI__builtin_neon_vsha1h_u32:
-    Int = Intrinsic::arm_neon_sha1h;
-    s = "sha1h"; OverloadInt = true; break;
-  case AArch64::BI__builtin_neon_vsha1cq_u32:
-    Int = Intrinsic::aarch64_neon_sha1c;
-    s = "sha1c"; break;
-  case AArch64::BI__builtin_neon_vsha1pq_u32:
-    Int = Intrinsic::aarch64_neon_sha1p;
-    s = "sha1p"; break;
-  case AArch64::BI__builtin_neon_vsha1mq_u32:
-    Int = Intrinsic::aarch64_neon_sha1m;
-    s = "sha1m"; break;
-  // Scalar Add
-  case AArch64::BI__builtin_neon_vaddd_s64:
-    Int = Intrinsic::aarch64_neon_vaddds;
-    s = "vaddds"; break;
-  case AArch64::BI__builtin_neon_vaddd_u64:
-    Int = Intrinsic::aarch64_neon_vadddu;
-    s = "vadddu"; break;
-  // Scalar Sub
-  case AArch64::BI__builtin_neon_vsubd_s64:
-    Int = Intrinsic::aarch64_neon_vsubds;
-    s = "vsubds"; break;
-  case AArch64::BI__builtin_neon_vsubd_u64:
-    Int = Intrinsic::aarch64_neon_vsubdu;
-    s = "vsubdu"; break;
-  // Scalar Saturating Add
-  case AArch64::BI__builtin_neon_vqaddb_s8:
-  case AArch64::BI__builtin_neon_vqaddh_s16:
-  case AArch64::BI__builtin_neon_vqadds_s32:
-  case AArch64::BI__builtin_neon_vqaddd_s64:
-    Int = Intrinsic::arm_neon_vqadds;
-    s = "vqadds"; OverloadInt = true; break;
-  case AArch64::BI__builtin_neon_vqaddb_u8:
-  case AArch64::BI__builtin_neon_vqaddh_u16:
-  case AArch64::BI__builtin_neon_vqadds_u32:
-  case AArch64::BI__builtin_neon_vqaddd_u64:
-    Int = Intrinsic::arm_neon_vqaddu;
-    s = "vqaddu"; OverloadInt = true; break;
-  // Scalar Saturating Sub
-  case AArch64::BI__builtin_neon_vqsubb_s8:
-  case AArch64::BI__builtin_neon_vqsubh_s16:
-  case AArch64::BI__builtin_neon_vqsubs_s32:
-  case AArch64::BI__builtin_neon_vqsubd_s64:
-    Int = Intrinsic::arm_neon_vqsubs;
-    s = "vqsubs"; OverloadInt = true; break;
-  case AArch64::BI__builtin_neon_vqsubb_u8:
-  case AArch64::BI__builtin_neon_vqsubh_u16:
-  case AArch64::BI__builtin_neon_vqsubs_u32:
-  case AArch64::BI__builtin_neon_vqsubd_u64:
-    Int = Intrinsic::arm_neon_vqsubu;
-    s = "vqsubu"; OverloadInt = true; break;
-  // Scalar Shift Left
-  case AArch64::BI__builtin_neon_vshld_s64:
-    Int = Intrinsic::aarch64_neon_vshlds;
-    s = "vshlds"; break;
-  case AArch64::BI__builtin_neon_vshld_u64:
-    Int = Intrinsic::aarch64_neon_vshldu;
-    s = "vshldu"; break;
-  // Scalar Saturating Shift Left
-  case AArch64::BI__builtin_neon_vqshlb_s8:
-  case AArch64::BI__builtin_neon_vqshlh_s16:
-  case AArch64::BI__builtin_neon_vqshls_s32:
-  case AArch64::BI__builtin_neon_vqshld_s64:
-    Int = Intrinsic::aarch64_neon_vqshls;
-    s = "vqshls"; OverloadInt = true; break;
-  case AArch64::BI__builtin_neon_vqshlb_u8:
-  case AArch64::BI__builtin_neon_vqshlh_u16:
-  case AArch64::BI__builtin_neon_vqshls_u32:
-  case AArch64::BI__builtin_neon_vqshld_u64:
-    Int = Intrinsic::aarch64_neon_vqshlu;
-    s = "vqshlu"; OverloadInt = true; break;
-  // Scalar Rouding Shift Left
-  case AArch64::BI__builtin_neon_vrshld_s64:
-    Int = Intrinsic::aarch64_neon_vrshlds;
-    s = "vrshlds"; break;
-  case AArch64::BI__builtin_neon_vrshld_u64:
-    Int = Intrinsic::aarch64_neon_vrshldu;
-    s = "vrshldu"; break;
-  // Scalar Saturating Rouding Shift Left
-  case AArch64::BI__builtin_neon_vqrshlb_s8:
-  case AArch64::BI__builtin_neon_vqrshlh_s16:
-  case AArch64::BI__builtin_neon_vqrshls_s32:
-  case AArch64::BI__builtin_neon_vqrshld_s64:
-    Int = Intrinsic::aarch64_neon_vqrshls;
-    s = "vqrshls"; OverloadInt = true; break;
-  case AArch64::BI__builtin_neon_vqrshlb_u8:
-  case AArch64::BI__builtin_neon_vqrshlh_u16:
-  case AArch64::BI__builtin_neon_vqrshls_u32:
-  case AArch64::BI__builtin_neon_vqrshld_u64:
-    Int = Intrinsic::aarch64_neon_vqrshlu;
-    s = "vqrshlu"; OverloadInt = true; break;
-  // Scalar Reduce Pairwise Add
-  case AArch64::BI__builtin_neon_vpaddd_s64:
-  case AArch64::BI__builtin_neon_vpaddd_u64:
-    Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd";
-    break;
-  case AArch64::BI__builtin_neon_vpadds_f32:
-    Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd";
-    break;
-  case AArch64::BI__builtin_neon_vpaddd_f64:
-    Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq";
-    break;
-  // Scalar Reduce Pairwise Floating Point Max
-  case AArch64::BI__builtin_neon_vpmaxs_f32:
-    Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax";
-    break;
-  case AArch64::BI__builtin_neon_vpmaxqd_f64:
-    Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq";
-    break;
-  // Scalar Reduce Pairwise Floating Point Min
-  case AArch64::BI__builtin_neon_vpmins_f32:
-    Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin";
-    break;
-  case AArch64::BI__builtin_neon_vpminqd_f64:
-    Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq";
-    break;
-  // Scalar Reduce Pairwise Floating Point Maxnm
-  case AArch64::BI__builtin_neon_vpmaxnms_f32:
-    Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm";
-    break;
-  case AArch64::BI__builtin_neon_vpmaxnmqd_f64:
-    Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq";
-    break;
-  // Scalar Reduce Pairwise Floating Point Minnm
-  case AArch64::BI__builtin_neon_vpminnms_f32:
-    Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm";
-    break;
-  case AArch64::BI__builtin_neon_vpminnmqd_f64:
-    Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq";
-    break;
-  // The followings are intrinsics with scalar results generated AcrossVec vectors
-  case AArch64::BI__builtin_neon_vaddlv_s8:
-  case AArch64::BI__builtin_neon_vaddlv_s16:
-  case AArch64::BI__builtin_neon_vaddlvq_s8:
-  case AArch64::BI__builtin_neon_vaddlvq_s16:
-  case AArch64::BI__builtin_neon_vaddlvq_s32:
-    Int = Intrinsic::aarch64_neon_saddlv;
-    AcrossVec = true; ExtendEle = true; s = "saddlv"; break;
-  case AArch64::BI__builtin_neon_vaddlv_u8:
-  case AArch64::BI__builtin_neon_vaddlv_u16:
-  case AArch64::BI__builtin_neon_vaddlvq_u8:
-  case AArch64::BI__builtin_neon_vaddlvq_u16:
-  case AArch64::BI__builtin_neon_vaddlvq_u32:
-    Int = Intrinsic::aarch64_neon_uaddlv;
-    AcrossVec = true; ExtendEle = true; s = "uaddlv"; break;
-  case AArch64::BI__builtin_neon_vmaxv_s8:
-  case AArch64::BI__builtin_neon_vmaxv_s16:
-  case AArch64::BI__builtin_neon_vmaxvq_s8:
-  case AArch64::BI__builtin_neon_vmaxvq_s16:
-  case AArch64::BI__builtin_neon_vmaxvq_s32:
-    Int = Intrinsic::aarch64_neon_smaxv;
-    AcrossVec = true; ExtendEle = false; s = "smaxv"; break;
-  case AArch64::BI__builtin_neon_vmaxv_u8:
-  case AArch64::BI__builtin_neon_vmaxv_u16:
-  case AArch64::BI__builtin_neon_vmaxvq_u8:
-  case AArch64::BI__builtin_neon_vmaxvq_u16:
-  case AArch64::BI__builtin_neon_vmaxvq_u32:
-    Int = Intrinsic::aarch64_neon_umaxv;
-    AcrossVec = true; ExtendEle = false; s = "umaxv"; break;
-  case AArch64::BI__builtin_neon_vminv_s8:
-  case AArch64::BI__builtin_neon_vminv_s16:
-  case AArch64::BI__builtin_neon_vminvq_s8:
-  case AArch64::BI__builtin_neon_vminvq_s16:
-  case AArch64::BI__builtin_neon_vminvq_s32:
-    Int = Intrinsic::aarch64_neon_sminv;
-    AcrossVec = true; ExtendEle = false; s = "sminv"; break;
-  case AArch64::BI__builtin_neon_vminv_u8:
-  case AArch64::BI__builtin_neon_vminv_u16:
-  case AArch64::BI__builtin_neon_vminvq_u8:
-  case AArch64::BI__builtin_neon_vminvq_u16:
-  case AArch64::BI__builtin_neon_vminvq_u32:
-    Int = Intrinsic::aarch64_neon_uminv;
-    AcrossVec = true; ExtendEle = false; s = "uminv"; break;
-  case AArch64::BI__builtin_neon_vaddv_s8:
-  case AArch64::BI__builtin_neon_vaddv_s16:
-  case AArch64::BI__builtin_neon_vaddvq_s8:
-  case AArch64::BI__builtin_neon_vaddvq_s16:
-  case AArch64::BI__builtin_neon_vaddvq_s32:
-  case AArch64::BI__builtin_neon_vaddvq_s64:
-  case AArch64::BI__builtin_neon_vaddv_u8:
-  case AArch64::BI__builtin_neon_vaddv_u16:
-  case AArch64::BI__builtin_neon_vaddvq_u8:
-  case AArch64::BI__builtin_neon_vaddvq_u16:
-  case AArch64::BI__builtin_neon_vaddvq_u32:
-  case AArch64::BI__builtin_neon_vaddvq_u64:
-  case AArch64::BI__builtin_neon_vaddv_f32:
-  case AArch64::BI__builtin_neon_vaddvq_f32:
-  case AArch64::BI__builtin_neon_vaddvq_f64:
-    Int = Intrinsic::aarch64_neon_vaddv;
-    AcrossVec = true; ExtendEle = false; s = "vaddv"; break;      
-  case AArch64::BI__builtin_neon_vmaxv_f32:
-  case AArch64::BI__builtin_neon_vmaxvq_f32:
-  case AArch64::BI__builtin_neon_vmaxvq_f64:
-    Int = Intrinsic::aarch64_neon_vmaxv;
-    AcrossVec = true; ExtendEle = false; s = "vmaxv"; break;
-  case AArch64::BI__builtin_neon_vminv_f32:
-  case AArch64::BI__builtin_neon_vminvq_f32:
-  case AArch64::BI__builtin_neon_vminvq_f64:
-    Int = Intrinsic::aarch64_neon_vminv;
-    AcrossVec = true; ExtendEle = false; s = "vminv"; break;
-  case AArch64::BI__builtin_neon_vmaxnmv_f32:
-  case AArch64::BI__builtin_neon_vmaxnmvq_f32:
-  case AArch64::BI__builtin_neon_vmaxnmvq_f64:
-    Int = Intrinsic::aarch64_neon_vmaxnmv;
-    AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break;
-  case AArch64::BI__builtin_neon_vminnmv_f32:
-  case AArch64::BI__builtin_neon_vminnmvq_f32:
-  case AArch64::BI__builtin_neon_vminnmvq_f64:
-    Int = Intrinsic::aarch64_neon_vminnmv;
-    AcrossVec = true; ExtendEle = false; s = "vminnmv"; break;
-  // Scalar Integer Saturating Doubling Multiply Half High
-  case AArch64::BI__builtin_neon_vqdmulhh_s16:
-  case AArch64::BI__builtin_neon_vqdmulhs_s32:
-    Int = Intrinsic::arm_neon_vqdmulh;
-    s = "vqdmulh"; OverloadInt = true; break;
-  // Scalar Integer Saturating Rounding Doubling Multiply Half High
-  case AArch64::BI__builtin_neon_vqrdmulhh_s16:
-  case AArch64::BI__builtin_neon_vqrdmulhs_s32:
-    Int = Intrinsic::arm_neon_vqrdmulh;
-    s = "vqrdmulh"; OverloadInt = true; break;
-  // Scalar Floating-point Reciprocal Step and
-  case AArch64::BI__builtin_neon_vrecpss_f32:
-  case AArch64::BI__builtin_neon_vrecpsd_f64:
-    Int = Intrinsic::arm_neon_vrecps;
-    s = "vrecps"; OverloadInt = true; break;
-  // Scalar Floating-point Reciprocal Square Root Step
-  case AArch64::BI__builtin_neon_vrsqrtss_f32:
-  case AArch64::BI__builtin_neon_vrsqrtsd_f64:
-    Int = Intrinsic::arm_neon_vrsqrts;
-    s = "vrsqrts"; OverloadInt = true; break;
-  // Scalar Signed Integer Convert To Floating-point
-  case AArch64::BI__builtin_neon_vcvts_f32_s32:
-    Int = Intrinsic::aarch64_neon_vcvtf32_s32,
-    s = "vcvtf"; OverloadInt = false; break;
-  case AArch64::BI__builtin_neon_vcvtd_f64_s64:
-    Int = Intrinsic::aarch64_neon_vcvtf64_s64,
-    s = "vcvtf"; OverloadInt = false; break;
-  // Scalar Unsigned Integer Convert To Floating-point
-  case AArch64::BI__builtin_neon_vcvts_f32_u32:
-    Int = Intrinsic::aarch64_neon_vcvtf32_u32,
-    s = "vcvtf"; OverloadInt = false; break;
-  case AArch64::BI__builtin_neon_vcvtd_f64_u64:
-    Int = Intrinsic::aarch64_neon_vcvtf64_u64,
-    s = "vcvtf"; OverloadInt = false; break;
-  // Scalar Floating-point Converts
-  case AArch64::BI__builtin_neon_vcvtxd_f32_f64:
-    Int = Intrinsic::aarch64_neon_fcvtxn;
-    s = "vcvtxn"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtas_s32_f32:
-  case AArch64::BI__builtin_neon_vcvtad_s64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtas;
-    s = "vcvtas"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtas_u32_f32:
-  case AArch64::BI__builtin_neon_vcvtad_u64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtau;
-    s = "vcvtau"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtms_s32_f32:
-  case AArch64::BI__builtin_neon_vcvtmd_s64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtms;
-    s = "vcvtms"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtms_u32_f32:
-  case AArch64::BI__builtin_neon_vcvtmd_u64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtmu;
-    s = "vcvtmu"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtns_s32_f32:
-  case AArch64::BI__builtin_neon_vcvtnd_s64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtns;
-    s = "vcvtns"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtns_u32_f32:
-  case AArch64::BI__builtin_neon_vcvtnd_u64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtnu;
-    s = "vcvtnu"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtps_s32_f32:
-  case AArch64::BI__builtin_neon_vcvtpd_s64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtps;
-    s = "vcvtps"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvtps_u32_f32:
-  case AArch64::BI__builtin_neon_vcvtpd_u64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtpu;
-    s = "vcvtpu"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvts_s32_f32:
-  case AArch64::BI__builtin_neon_vcvtd_s64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtzs;
-    s = "vcvtzs"; OverloadCvtInt = true; break;
-  case AArch64::BI__builtin_neon_vcvts_u32_f32:
-  case AArch64::BI__builtin_neon_vcvtd_u64_f64:
-    Int = Intrinsic::aarch64_neon_fcvtzu;
-    s = "vcvtzu"; OverloadCvtInt = true; break;
-  // Scalar Floating-point Reciprocal Estimate
-  case AArch64::BI__builtin_neon_vrecpes_f32:
-  case AArch64::BI__builtin_neon_vrecped_f64:
-    Int = Intrinsic::arm_neon_vrecpe;
-    s = "vrecpe"; OverloadInt = true; break;
-  // Scalar Floating-point Reciprocal Exponent
-  case AArch64::BI__builtin_neon_vrecpxs_f32:
-  case AArch64::BI__builtin_neon_vrecpxd_f64:
-    Int = Intrinsic::aarch64_neon_vrecpx;
-    s = "vrecpx"; OverloadInt = true; break;
-  // Scalar Floating-point Reciprocal Square Root Estimate
-  case AArch64::BI__builtin_neon_vrsqrtes_f32:
-  case AArch64::BI__builtin_neon_vrsqrted_f64:
-    Int = Intrinsic::arm_neon_vrsqrte;
-    s = "vrsqrte"; OverloadInt = true; break;
-  // Scalar Compare Equal
-  case AArch64::BI__builtin_neon_vceqd_s64:
-  case AArch64::BI__builtin_neon_vceqd_u64:
-    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
-    OverloadCmpInt = true; break;
-  // Scalar Compare Equal To Zero
-  case AArch64::BI__builtin_neon_vceqzd_s64:
-  case AArch64::BI__builtin_neon_vceqzd_u64:
-    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
-    OverloadCmpInt = true; break;
-  // Scalar Compare Greater Than or Equal
-  case AArch64::BI__builtin_neon_vcged_s64:
-    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
-    OverloadCmpInt = true; break;
-  case AArch64::BI__builtin_neon_vcged_u64:
-    Int = Intrinsic::aarch64_neon_vchs; s = "vcge";
-    OverloadCmpInt = true; break;
-  // Scalar Compare Greater Than or Equal To Zero
-  case AArch64::BI__builtin_neon_vcgezd_s64:
-    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
-    OverloadCmpInt = true; break;
-  // Scalar Compare Greater Than
-  case AArch64::BI__builtin_neon_vcgtd_s64:
-    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
-    OverloadCmpInt = true; break;
-  case AArch64::BI__builtin_neon_vcgtd_u64:
-    Int = Intrinsic::aarch64_neon_vchi; s = "vcgt";
-    OverloadCmpInt = true; break;
-  // Scalar Compare Greater Than Zero
-  case AArch64::BI__builtin_neon_vcgtzd_s64:
-    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
-    OverloadCmpInt = true; break;
-  // Scalar Compare Less Than or Equal
-  case AArch64::BI__builtin_neon_vcled_s64:
-    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
-    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
-  case AArch64::BI__builtin_neon_vcled_u64:
-    Int = Intrinsic::aarch64_neon_vchs; s = "vchs";
-    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
-  // Scalar Compare Less Than or Equal To Zero
-  case AArch64::BI__builtin_neon_vclezd_s64:
-    Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
-    OverloadCmpInt = true; break;
-  // Scalar Compare Less Than
-  case AArch64::BI__builtin_neon_vcltd_s64:
-    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
-    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
-  case AArch64::BI__builtin_neon_vcltd_u64:
-    Int = Intrinsic::aarch64_neon_vchi; s = "vchi";
-    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
-  // Scalar Compare Less Than Zero
-  case AArch64::BI__builtin_neon_vcltzd_s64:
-    Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Equal
-  case AArch64::BI__builtin_neon_vceqs_f32:
-  case AArch64::BI__builtin_neon_vceqd_f64:
-    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Equal To Zero
-  case AArch64::BI__builtin_neon_vceqzs_f32:
-  case AArch64::BI__builtin_neon_vceqzd_f64:
-    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
-    IsFpCmpZInt = true;
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Greater Than Or Equal
-  case AArch64::BI__builtin_neon_vcges_f32:
-  case AArch64::BI__builtin_neon_vcged_f64:
-    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Greater Than Or Equal To Zero
-  case AArch64::BI__builtin_neon_vcgezs_f32:
-  case AArch64::BI__builtin_neon_vcgezd_f64:
-    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
-    IsFpCmpZInt = true;
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Greather Than
-  case AArch64::BI__builtin_neon_vcgts_f32:
-  case AArch64::BI__builtin_neon_vcgtd_f64:
-    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Greather Than Zero
-  case AArch64::BI__builtin_neon_vcgtzs_f32:
-  case AArch64::BI__builtin_neon_vcgtzd_f64:
-    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
-    IsFpCmpZInt = true;
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Less Than or Equal
-  case AArch64::BI__builtin_neon_vcles_f32:
-  case AArch64::BI__builtin_neon_vcled_f64:
-    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Less Than Or Equal To Zero
-  case AArch64::BI__builtin_neon_vclezs_f32:
-  case AArch64::BI__builtin_neon_vclezd_f64:
-    Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
-    IsFpCmpZInt = true;
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Compare Less Than Zero
-  case AArch64::BI__builtin_neon_vclts_f32:
-  case AArch64::BI__builtin_neon_vcltd_f64:
-    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
-    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
-  // Scalar Floating-point Compare Less Than Zero
-  case AArch64::BI__builtin_neon_vcltzs_f32:
-  case AArch64::BI__builtin_neon_vcltzd_f64:
-    Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
-    IsFpCmpZInt = true;
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Absolute Compare Greater Than Or Equal
-  case AArch64::BI__builtin_neon_vcages_f32:
-  case AArch64::BI__builtin_neon_vcaged_f64:
-    Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Absolute Compare Greater Than
-  case AArch64::BI__builtin_neon_vcagts_f32:
-  case AArch64::BI__builtin_neon_vcagtd_f64:
-    Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt";
-    OverloadCmpInt = true; break;
-  // Scalar Floating-point Absolute Compare Less Than Or Equal
-  case AArch64::BI__builtin_neon_vcales_f32:
-  case AArch64::BI__builtin_neon_vcaled_f64:
-    Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
-    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
-  // Scalar Floating-point Absolute Compare Less Than
-  case AArch64::BI__builtin_neon_vcalts_f32:
-  case AArch64::BI__builtin_neon_vcaltd_f64:
-    Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt";
-    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
-  // Scalar Compare Bitwise Test Bits
-  case AArch64::BI__builtin_neon_vtstd_s64:
-  case AArch64::BI__builtin_neon_vtstd_u64:
-    Int = Intrinsic::aarch64_neon_vtstd; s = "vtst";
-    OverloadCmpInt = true; break;
-  // Scalar Absolute Value
-  case AArch64::BI__builtin_neon_vabsd_s64:
-    Int = Intrinsic::aarch64_neon_vabs;
-    s = "vabs"; OverloadInt = false; break;
-  // Scalar Absolute Difference
-  case AArch64::BI__builtin_neon_vabds_f32:
-  case AArch64::BI__builtin_neon_vabdd_f64:
-    Int = Intrinsic::aarch64_neon_vabd;
-    s = "vabd"; OverloadInt = true; break;
-  // Scalar Signed Saturating Absolute Value
-  case AArch64::BI__builtin_neon_vqabsb_s8:
-  case AArch64::BI__builtin_neon_vqabsh_s16:
-  case AArch64::BI__builtin_neon_vqabss_s32:
-  case AArch64::BI__builtin_neon_vqabsd_s64:
-    Int = Intrinsic::arm_neon_vqabs;
-    s = "vqabs"; OverloadInt = true; break;
-  // Scalar Negate
-  case AArch64::BI__builtin_neon_vnegd_s64:
-    Int = Intrinsic::aarch64_neon_vneg;
-    s = "vneg"; OverloadInt = false; break;
-  // Scalar Signed Saturating Negate
-  case AArch64::BI__builtin_neon_vqnegb_s8:
-  case AArch64::BI__builtin_neon_vqnegh_s16:
-  case AArch64::BI__builtin_neon_vqnegs_s32:
-  case AArch64::BI__builtin_neon_vqnegd_s64:
-    Int = Intrinsic::arm_neon_vqneg;
-    s = "vqneg"; OverloadInt = true; break;
-  // Scalar Signed Saturating Accumulated of Unsigned Value
-  case AArch64::BI__builtin_neon_vuqaddb_s8:
-  case AArch64::BI__builtin_neon_vuqaddh_s16:
-  case AArch64::BI__builtin_neon_vuqadds_s32:
-  case AArch64::BI__builtin_neon_vuqaddd_s64:
-    Int = Intrinsic::aarch64_neon_vuqadd;
-    s = "vuqadd"; OverloadInt = true; break;
-  // Scalar Unsigned Saturating Accumulated of Signed Value
-  case AArch64::BI__builtin_neon_vsqaddb_u8:
-  case AArch64::BI__builtin_neon_vsqaddh_u16:
-  case AArch64::BI__builtin_neon_vsqadds_u32:
-  case AArch64::BI__builtin_neon_vsqaddd_u64:
-    Int = Intrinsic::aarch64_neon_vsqadd;
-    s = "vsqadd"; OverloadInt = true; break;
-  // Signed Saturating Doubling Multiply-Add Long
-  case AArch64::BI__builtin_neon_vqdmlalh_s16:
-  case AArch64::BI__builtin_neon_vqdmlals_s32:
-    Int = Intrinsic::aarch64_neon_vqdmlal;
-    s = "vqdmlal"; OverloadWideInt = true; break;
-  // Signed Saturating Doubling Multiply-Subtract Long
-  case AArch64::BI__builtin_neon_vqdmlslh_s16:
-  case AArch64::BI__builtin_neon_vqdmlsls_s32:
-    Int = Intrinsic::aarch64_neon_vqdmlsl;
-    s = "vqdmlsl"; OverloadWideInt = true; break;
-  // Signed Saturating Doubling Multiply Long
-  case AArch64::BI__builtin_neon_vqdmullh_s16:
-  case AArch64::BI__builtin_neon_vqdmulls_s32:
-    Int = Intrinsic::arm_neon_vqdmull;
-    s = "vqdmull"; OverloadWideInt = true; break;
-  // Scalar Signed Saturating Extract Unsigned Narrow
-  case AArch64::BI__builtin_neon_vqmovunh_s16:
-  case AArch64::BI__builtin_neon_vqmovuns_s32:
-  case AArch64::BI__builtin_neon_vqmovund_s64:
-    Int = Intrinsic::arm_neon_vqmovnsu;
-    s = "vqmovun"; OverloadNarrowInt = true; break;
-  // Scalar Signed Saturating Extract Narrow
-  case AArch64::BI__builtin_neon_vqmovnh_s16:
-  case AArch64::BI__builtin_neon_vqmovns_s32:
-  case AArch64::BI__builtin_neon_vqmovnd_s64:
-    Int = Intrinsic::arm_neon_vqmovns;
-    s = "vqmovn"; OverloadNarrowInt = true; break;
-  // Scalar Unsigned Saturating Extract Narrow
-  case AArch64::BI__builtin_neon_vqmovnh_u16:
-  case AArch64::BI__builtin_neon_vqmovns_u32:
-  case AArch64::BI__builtin_neon_vqmovnd_u64:
-    Int = Intrinsic::arm_neon_vqmovnu;
-    s = "vqmovn"; OverloadNarrowInt = true; break;
-  // Scalar Signed Shift Right (Immediate)
-  case AArch64::BI__builtin_neon_vshrd_n_s64:
-    Int = Intrinsic::aarch64_neon_vshrds_n;
-    s = "vsshr"; OverloadInt = false; break;
-  // Scalar Unsigned Shift Right (Immediate)
-  case AArch64::BI__builtin_neon_vshrd_n_u64:
-    Int = Intrinsic::aarch64_neon_vshrdu_n;
-    s = "vushr"; OverloadInt = false; break;
-  // Scalar Signed Rounding Shift Right (Immediate)
-  case AArch64::BI__builtin_neon_vrshrd_n_s64:
-    Int = Intrinsic::aarch64_neon_vsrshr;
-    s = "vsrshr"; OverloadInt = true; break;
-  // Scalar Unsigned Rounding Shift Right (Immediate)
-  case AArch64::BI__builtin_neon_vrshrd_n_u64:
-    Int = Intrinsic::aarch64_neon_vurshr;
-    s = "vurshr"; OverloadInt = true; break;
-  // Scalar Signed Shift Right and Accumulate (Immediate)
-  case AArch64::BI__builtin_neon_vsrad_n_s64:
-    Int = Intrinsic::aarch64_neon_vsrads_n;
-    s = "vssra"; OverloadInt = false; break;
-  // Scalar Unsigned Shift Right and Accumulate (Immediate)
-  case AArch64::BI__builtin_neon_vsrad_n_u64:
-    Int = Intrinsic::aarch64_neon_vsradu_n;
-    s = "vusra"; OverloadInt = false; break;
-  // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
-  case AArch64::BI__builtin_neon_vrsrad_n_s64:
-    Int = Intrinsic::aarch64_neon_vrsrads_n;
-    s = "vsrsra"; OverloadInt = false; break;
-  // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
-  case AArch64::BI__builtin_neon_vrsrad_n_u64:
-    Int = Intrinsic::aarch64_neon_vrsradu_n;
-    s = "vursra"; OverloadInt = false; break;
-  // Scalar Signed/Unsigned Shift Left (Immediate)
-  case AArch64::BI__builtin_neon_vshld_n_s64:
-  case AArch64::BI__builtin_neon_vshld_n_u64:
-    Int = Intrinsic::aarch64_neon_vshld_n;
-    s = "vshl"; OverloadInt = false; break;
-  // Signed Saturating Shift Left (Immediate)
-  case AArch64::BI__builtin_neon_vqshlb_n_s8:
-  case AArch64::BI__builtin_neon_vqshlh_n_s16:
-  case AArch64::BI__builtin_neon_vqshls_n_s32:
-  case AArch64::BI__builtin_neon_vqshld_n_s64:
-    Int = Intrinsic::aarch64_neon_vqshls_n;
-    s = "vsqshl"; OverloadInt = true; break;
-  // Unsigned Saturating Shift Left (Immediate)
-  case AArch64::BI__builtin_neon_vqshlb_n_u8:
-  case AArch64::BI__builtin_neon_vqshlh_n_u16:
-  case AArch64::BI__builtin_neon_vqshls_n_u32:
-  case AArch64::BI__builtin_neon_vqshld_n_u64:
-    Int = Intrinsic::aarch64_neon_vqshlu_n;
-    s = "vuqshl"; OverloadInt = true; break;
-  // Signed Saturating Shift Left Unsigned (Immediate)
-  case AArch64::BI__builtin_neon_vqshlub_n_s8:
-  case AArch64::BI__builtin_neon_vqshluh_n_s16:
-  case AArch64::BI__builtin_neon_vqshlus_n_s32:
-  case AArch64::BI__builtin_neon_vqshlud_n_s64:
-    Int = Intrinsic::aarch64_neon_vsqshlu;
-    s = "vsqshlu"; OverloadInt = true; break;
-  // Shift Right And Insert (Immediate)
-  case AArch64::BI__builtin_neon_vsrid_n_s64:
-  case AArch64::BI__builtin_neon_vsrid_n_u64:
-    Int = Intrinsic::aarch64_neon_vsri;
-    s = "vsri"; OverloadInt = true; break;
-  // Shift Left And Insert (Immediate)
-  case AArch64::BI__builtin_neon_vslid_n_s64:
-  case AArch64::BI__builtin_neon_vslid_n_u64:
-    Int = Intrinsic::aarch64_neon_vsli;
-    s = "vsli"; OverloadInt = true; break;
-  // Signed Saturating Shift Right Narrow (Immediate)
-  case AArch64::BI__builtin_neon_vqshrnh_n_s16:
-  case AArch64::BI__builtin_neon_vqshrns_n_s32:
-  case AArch64::BI__builtin_neon_vqshrnd_n_s64:
-    Int = Intrinsic::aarch64_neon_vsqshrn;
-    s = "vsqshrn"; OverloadInt = true; break;
-  // Unsigned Saturating Shift Right Narrow (Immediate)
-  case AArch64::BI__builtin_neon_vqshrnh_n_u16:
-  case AArch64::BI__builtin_neon_vqshrns_n_u32:
-  case AArch64::BI__builtin_neon_vqshrnd_n_u64:
-    Int = Intrinsic::aarch64_neon_vuqshrn;
-    s = "vuqshrn"; OverloadInt = true; break;
-  // Signed Saturating Rounded Shift Right Narrow (Immediate)
-  case AArch64::BI__builtin_neon_vqrshrnh_n_s16:
-  case AArch64::BI__builtin_neon_vqrshrns_n_s32:
-  case AArch64::BI__builtin_neon_vqrshrnd_n_s64:
-    Int = Intrinsic::aarch64_neon_vsqrshrn;
-    s = "vsqrshrn"; OverloadInt = true; break;
-  // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
-  case AArch64::BI__builtin_neon_vqrshrnh_n_u16:
-  case AArch64::BI__builtin_neon_vqrshrns_n_u32:
-  case AArch64::BI__builtin_neon_vqrshrnd_n_u64:
-    Int = Intrinsic::aarch64_neon_vuqrshrn;
-    s = "vuqrshrn"; OverloadInt = true; break;
-  // Signed Saturating Shift Right Unsigned Narrow (Immediate)
-  case AArch64::BI__builtin_neon_vqshrunh_n_s16:
-  case AArch64::BI__builtin_neon_vqshruns_n_s32:
-  case AArch64::BI__builtin_neon_vqshrund_n_s64:
-    Int = Intrinsic::aarch64_neon_vsqshrun;
-    s = "vsqshrun"; OverloadInt = true; break;
-  // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
-  case AArch64::BI__builtin_neon_vqrshrunh_n_s16:
-  case AArch64::BI__builtin_neon_vqrshruns_n_s32:
-  case AArch64::BI__builtin_neon_vqrshrund_n_s64:
-    Int = Intrinsic::aarch64_neon_vsqrshrun;
-    s = "vsqrshrun"; OverloadInt = true; break;
-  // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
-  case AArch64::BI__builtin_neon_vcvts_n_f32_s32:
-    Int = Intrinsic::aarch64_neon_vcvtf32_n_s32;
-    s = "vcvtf"; OverloadInt = false; break;
-  case AArch64::BI__builtin_neon_vcvtd_n_f64_s64:
-    Int = Intrinsic::aarch64_neon_vcvtf64_n_s64;
-    s = "vcvtf"; OverloadInt = false; break;
-  // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
-  case AArch64::BI__builtin_neon_vcvts_n_f32_u32:
-    Int = Intrinsic::aarch64_neon_vcvtf32_n_u32;
-    s = "vcvtf"; OverloadInt = false; break;
-  case AArch64::BI__builtin_neon_vcvtd_n_f64_u64:
-    Int = Intrinsic::aarch64_neon_vcvtf64_n_u64;
-    s = "vcvtf"; OverloadInt = false; break;
-  // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
-  case AArch64::BI__builtin_neon_vcvts_n_s32_f32:
-    Int = Intrinsic::aarch64_neon_vcvts_n_s32_f32;
-    s = "fcvtzs"; OverloadInt = false; break;
-  case AArch64::BI__builtin_neon_vcvtd_n_s64_f64:
-    Int = Intrinsic::aarch64_neon_vcvtd_n_s64_f64;
-    s = "fcvtzs"; OverloadInt = false; break;
-  // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
-  case AArch64::BI__builtin_neon_vcvts_n_u32_f32:
-    Int = Intrinsic::aarch64_neon_vcvts_n_u32_f32;
-    s = "fcvtzu"; OverloadInt = false; break;
-  case AArch64::BI__builtin_neon_vcvtd_n_u64_f64:
-    Int = Intrinsic::aarch64_neon_vcvtd_n_u64_f64;
-    s = "fcvtzu"; OverloadInt = false; break;
-  }
-
-  if (!Int)
-    return 0;
-
-  // AArch64 scalar builtin that returns scalar type
-  // and should be mapped to AArch64 intrinsic that returns
-  // one-element vector type.
-  Function *F = 0;
-  if (AcrossVec) {
-    // Gen arg type
-    const Expr *Arg = E->getArg(E->getNumArgs()-1);
-    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
-    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-    llvm::Type *ETy = VTy->getElementType();
-    llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1);
-  
-    if (ExtendEle) {
-      assert(!ETy->isFloatingPointTy());
-      RTy = llvm::VectorType::getExtendedElementVectorType(RTy);
-    }
-
-    llvm::Type *Tys[2] = {RTy, VTy};
-    F = CGF.CGM.getIntrinsic(Int, Tys);
-    assert(E->getNumArgs() == 1);
-  } else if (OverloadInt) {
-    // Determine the type of this overloaded AArch64 intrinsic
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
-    assert(VTy);
-
-    F = CGF.CGM.getIntrinsic(Int, VTy);
-  } else if (OverloadWideInt || OverloadNarrowInt) {
-    // Determine the type of this overloaded AArch64 intrinsic
-    const Expr *Arg = E->getArg(E->getNumArgs()-1);
-    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
-    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
-    llvm::VectorType *RTy = OverloadWideInt ? 
-      llvm::VectorType::getExtendedElementVectorType(VTy) :
-      llvm::VectorType::getTruncatedElementVectorType(VTy);
-    F = CGF.CGM.getIntrinsic(Int, RTy);
-  } else if (OverloadCmpInt) {
-    // Determine the types of this overloaded AArch64 intrinsic
-    SmallVector<llvm::Type *, 3> Tys;
-    const Expr *Arg = E->getArg(E->getNumArgs()-1);
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
-    Tys.push_back(VTy);
-    Ty = CGF.ConvertType(Arg->getType());
-    VTy = llvm::VectorType::get(Ty, 1);
-    Tys.push_back(VTy);
-    if(IsFpCmpZInt)
-      VTy = llvm::VectorType::get(CGF.FloatTy, 1);
-    Tys.push_back(VTy);
-
-    F = CGF.CGM.getIntrinsic(Int, Tys);
-  } else if (OverloadCvtInt) {
-    // Determine the types of this overloaded AArch64 intrinsic
-    SmallVector<llvm::Type *, 2> Tys;
-    const Expr *Arg = E->getArg(E->getNumArgs()-1);
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
-    Tys.push_back(VTy);
-    Ty = CGF.ConvertType(Arg->getType());
-    VTy = llvm::VectorType::get(Ty, 1);
-    Tys.push_back(VTy);
-
-    F = CGF.CGM.getIntrinsic(Int, Tys);
-  } else
-    F = CGF.CGM.getIntrinsic(Int);
-
-  Value *Result = CGF.EmitNeonCall(F, Ops, s);
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  // AArch64 intrinsic one-element vector type cast to
-  // scalar type expected by the builtin
-  return CGF.Builder.CreateBitCast(Result, ResultType, s);
+enum {
+  AddRetType = (1 << 0),
+  Add1ArgType = (1 << 1),
+  Add2ArgTypes = (1 << 2),
+
+  VectorizeRetType = (1 << 3),
+  VectorizeArgTypes = (1 << 4),
+
+  InventFloatType = (1 << 5),
+  UnsignedAlts = (1 << 6),
+
+  Use64BitVectors = (1 << 7),
+  Use128BitVectors = (1 << 8),
+
+  Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+  VectorRet = AddRetType | VectorizeRetType,
+  VectorRetGetArgs01 =
+      AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+  FpCmpzModifiers =
+      AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
+};
+
+ struct NeonIntrinsicInfo {
+  unsigned BuiltinID;
+  unsigned LLVMIntrinsic;
+  unsigned AltLLVMIntrinsic;
+  const char *NameHint;
+  unsigned TypeModifier;
+
+  bool operator<(unsigned RHSBuiltinID) const {
+    return BuiltinID < RHSBuiltinID;
+  }
+};
+
+#define NEONMAP0(NameBase) \
+  { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
+
+#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+  { NEON:: BI__builtin_neon_ ## NameBase, \
+      Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
+
+#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
+  { NEON:: BI__builtin_neon_ ## NameBase, \
+      Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
+      #NameBase, TypeModifier }
+
+static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
+  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vabs_v, arm_neon_vabs, 0),
+  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
+  NEONMAP0(vaddhn_v),
+  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
+  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
+  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
+  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
+  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
+  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
+  NEONMAP1(vcage_v, arm_neon_vacge, 0),
+  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
+  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
+  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
+  NEONMAP1(vcale_v, arm_neon_vacge, 0),
+  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
+  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
+  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
+  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
+  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
+  NEONMAP1(vclz_v, ctlz, Add1ArgType),
+  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+  NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0),
+  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
+  NEONMAP0(vcvt_f32_v),
+  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+  NEONMAP0(vcvt_s32_v),
+  NEONMAP0(vcvt_s64_v),
+  NEONMAP0(vcvt_u32_v),
+  NEONMAP0(vcvt_u64_v),
+  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
+  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
+  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
+  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
+  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
+  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
+  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
+  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
+  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
+  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
+  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
+  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
+  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
+  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
+  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
+  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
+  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
+  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
+  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
+  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
+  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
+  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
+  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
+  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
+  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
+  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
+  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
+  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
+  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
+  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
+  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
+  NEONMAP0(vcvtq_f32_v),
+  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+  NEONMAP0(vcvtq_s32_v),
+  NEONMAP0(vcvtq_s64_v),
+  NEONMAP0(vcvtq_u32_v),
+  NEONMAP0(vcvtq_u64_v),
+  NEONMAP0(vext_v),
+  NEONMAP0(vextq_v),
+  NEONMAP0(vfma_v),
+  NEONMAP0(vfmaq_v),
+  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
+  NEONMAP0(vld1_dup_v),
+  NEONMAP1(vld1_v, arm_neon_vld1, 0),
+  NEONMAP0(vld1q_dup_v),
+  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
+  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
+  NEONMAP1(vld2_v, arm_neon_vld2, 0),
+  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
+  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
+  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
+  NEONMAP1(vld3_v, arm_neon_vld3, 0),
+  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
+  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
+  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
+  NEONMAP1(vld4_v, arm_neon_vld4, 0),
+  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
+  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
+  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+  NEONMAP0(vmovl_v),
+  NEONMAP0(vmovn_v),
+  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
+  NEONMAP0(vmull_v),
+  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
+  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
+  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
+  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
+  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
+  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
+  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
+  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
+  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
+  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
+  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
+  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
+  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
+  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
+  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
+  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
+  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
+  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
+  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
+  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
+  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
+  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
+  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
+  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
+  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
+  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
+  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
+  NEONMAP0(vshl_n_v),
+  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP0(vshll_n_v),
+  NEONMAP0(vshlq_n_v),
+  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
+  NEONMAP0(vshr_n_v),
+  NEONMAP0(vshrn_n_v),
+  NEONMAP0(vshrq_n_v),
+  NEONMAP1(vst1_v, arm_neon_vst1, 0),
+  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
+  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
+  NEONMAP1(vst2_v, arm_neon_vst2, 0),
+  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
+  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
+  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
+  NEONMAP1(vst3_v, arm_neon_vst3, 0),
+  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
+  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
+  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
+  NEONMAP1(vst4_v, arm_neon_vst4, 0),
+  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
+  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
+  NEONMAP0(vsubhn_v),
+  NEONMAP0(vtrn_v),
+  NEONMAP0(vtrnq_v),
+  NEONMAP0(vtst_v),
+  NEONMAP0(vtstq_v),
+  NEONMAP0(vuzp_v),
+  NEONMAP0(vuzpq_v),
+  NEONMAP0(vzip_v),
+  NEONMAP0(vzipq_v)
+};
+
+static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
+  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
+  NEONMAP0(vaddhn_v),
+  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
+  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
+  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
+  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
+  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
+  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
+  NEONMAP1(vclz_v, ctlz, Add1ArgType),
+  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+  NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0),
+  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
+  NEONMAP0(vcvt_f32_v),
+  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP0(vcvtq_f32_v),
+  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
+  NEONMAP0(vext_v),
+  NEONMAP0(vextq_v),
+  NEONMAP0(vfma_v),
+  NEONMAP0(vfmaq_v),
+  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
+  NEONMAP0(vmovl_v),
+  NEONMAP0(vmovn_v),
+  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
+  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
+  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
+  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
+  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
+  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
+  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
+  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
+  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
+  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
+  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
+  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
+  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
+  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
+  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
+  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
+  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
+  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
+  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
+  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
+  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
+  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
+  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
+  NEONMAP0(vshl_n_v),
+  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
+  NEONMAP0(vshll_n_v),
+  NEONMAP0(vshlq_n_v),
+  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
+  NEONMAP0(vshr_n_v),
+  NEONMAP0(vshrn_n_v),
+  NEONMAP0(vshrq_n_v),
+  NEONMAP0(vsubhn_v),
+  NEONMAP0(vtst_v),
+  NEONMAP0(vtstq_v),
+};
+
+static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
+  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
+  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
+  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
+  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
+  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
+  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
+  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
+  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
+  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
+  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
+  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
+  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
+  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
+  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
+  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
+  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
+  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
+  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
+  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
+  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
+  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
+  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
+  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
+  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
+  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
+  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
+  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
+  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
+  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
+  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
+  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
+  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
+  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
+  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
+  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
+  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
+  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
+  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
+  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
+  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
+  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
+  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
+  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
+  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
+  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
+  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
+  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
+  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
+  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
+  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
+  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
+  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
+  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
+  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
+  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
+  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
+  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
+  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
+  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
+  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
+  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
+};
+
+#undef NEONMAP0
+#undef NEONMAP1
+#undef NEONMAP2
+
+static bool NEONSIMDIntrinsicsProvenSorted = false;
+
+static bool AArch64SIMDIntrinsicsProvenSorted = false;
+static bool AArch64SISDIntrinsicsProvenSorted = false;
+
+
+static const NeonIntrinsicInfo *
+findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
+                       unsigned BuiltinID, bool &MapProvenSorted) {
+
+#ifndef NDEBUG
+  if (!MapProvenSorted) {
+    // FIXME: use std::is_sorted once C++11 is allowed
+    for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
+      assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
+    MapProvenSorted = true;
+  }
+#endif
+
+  const NeonIntrinsicInfo *Builtin =
+      std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
+
+  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
+    return Builtin;
+
+  return nullptr;
 }
 
-Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
-    Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
-    const CmpInst::Predicate Ip, const Twine &Name) {
-  llvm::Type *OTy = ((llvm::User *)Op)->getOperand(0)->getType();
-  if (OTy->isPointerTy())
-    OTy = Ty;
-  Op = Builder.CreateBitCast(Op, OTy);
-  if (((llvm::VectorType *)OTy)->getElementType()->isFloatingPointTy()) {
-    Op = Builder.CreateFCmp(Fp, Op, ConstantAggregateZero::get(OTy));
-  } else {
-    Op = Builder.CreateICmp(Ip, Op, ConstantAggregateZero::get(OTy));
-  }
-  return Builder.CreateZExt(Op, Ty, Name);
-}
+Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
+                                                   unsigned Modifier,
+                                                   llvm::Type *ArgType,
+                                                   const CallExpr *E) {
+  int VectorSize = 0;
+  if (Modifier & Use64BitVectors)
+    VectorSize = 64;
+  else if (Modifier & Use128BitVectors)
+    VectorSize = 128;
 
-static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
-                                 Value *ExtOp, Value *IndexOp,
-                                 llvm::Type *ResTy, unsigned IntID,
-                                 const char *Name) {
-  SmallVector<Value *, 2> TblOps;
-  if (ExtOp)
-    TblOps.push_back(ExtOp);
+  // Return type.
+  SmallVector<llvm::Type *, 3> Tys;
+  if (Modifier & AddRetType) {
+    llvm::Type *Ty = ConvertType(E->getCallReturnType());
+    if (Modifier & VectorizeRetType)
+      Ty = llvm::VectorType::get(
+          Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
 
-  // Build a vector containing sequential number like (0, 1, 2, ..., 15)  
-  SmallVector<Constant*, 16> Indices;
-  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
-  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
-    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
-    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
+    Tys.push_back(Ty);
   }
-  Value *SV = llvm::ConstantVector::get(Indices);
 
-  int PairPos = 0, End = Ops.size() - 1;
-  while (PairPos < End) {
-    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
-                                                     Ops[PairPos+1], SV, Name));
-    PairPos += 2;
+  // Arguments.
+  if (Modifier & VectorizeArgTypes) {
+    int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
+    ArgType = llvm::VectorType::get(ArgType, Elts);
   }
 
-  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
-  // of the 128-bit lookup table with zero.
-  if (PairPos == End) {
-    Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
-    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
-                                                     ZeroTbl, SV, Name));
-  }
+  if (Modifier & (Add1ArgType | Add2ArgTypes))
+    Tys.push_back(ArgType);
 
-  TblTy = llvm::VectorType::get(TblTy->getElementType(),
-                                2*TblTy->getNumElements());
-  llvm::Type *Tys[2] = { ResTy, TblTy };
+  if (Modifier & Add2ArgTypes)
+    Tys.push_back(ArgType);
 
-  Function *TblF;
-  TblOps.push_back(IndexOp);
-  TblF = CGF.CGM.getIntrinsic(IntID, Tys);
-  
-  return CGF.EmitNeonCall(TblF, TblOps, Name);
+  if (Modifier & InventFloatType)
+    Tys.push_back(FloatTy);
+
+  return CGM.getIntrinsic(IntrinsicID, Tys);
 }
 
-static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF,
-                                        unsigned BuiltinID,
-                                        const CallExpr *E) {
-  unsigned int Int = 0;
-  const char *s = NULL;
+static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
+                                            const NeonIntrinsicInfo &SISDInfo,
+                                            SmallVectorImpl<Value *> &Ops,
+                                            const CallExpr *E) {
+  unsigned BuiltinID = SISDInfo.BuiltinID;
+  unsigned int Int = SISDInfo.LLVMIntrinsic;
+  unsigned Modifier = SISDInfo.TypeModifier;
+  const char *s = SISDInfo.NameHint;
 
-  unsigned TblPos;
   switch (BuiltinID) {
-  default:
-    return 0;
-  case AArch64::BI__builtin_neon_vtbl1_v:
-  case AArch64::BI__builtin_neon_vqtbl1_v:
-  case AArch64::BI__builtin_neon_vqtbl1q_v:
-  case AArch64::BI__builtin_neon_vtbl2_v:
-  case AArch64::BI__builtin_neon_vqtbl2_v:
-  case AArch64::BI__builtin_neon_vqtbl2q_v:
-  case AArch64::BI__builtin_neon_vtbl3_v:
-  case AArch64::BI__builtin_neon_vqtbl3_v:
-  case AArch64::BI__builtin_neon_vqtbl3q_v:
-  case AArch64::BI__builtin_neon_vtbl4_v:
-  case AArch64::BI__builtin_neon_vqtbl4_v:
-  case AArch64::BI__builtin_neon_vqtbl4q_v:
-    TblPos = 0;
-    break;
-  case AArch64::BI__builtin_neon_vtbx1_v:
-  case AArch64::BI__builtin_neon_vqtbx1_v:
-  case AArch64::BI__builtin_neon_vqtbx1q_v:
-  case AArch64::BI__builtin_neon_vtbx2_v:
-  case AArch64::BI__builtin_neon_vqtbx2_v:
-  case AArch64::BI__builtin_neon_vqtbx2q_v:
-  case AArch64::BI__builtin_neon_vtbx3_v:
-  case AArch64::BI__builtin_neon_vqtbx3_v:
-  case AArch64::BI__builtin_neon_vqtbx3q_v:
-  case AArch64::BI__builtin_neon_vtbx4_v:
-  case AArch64::BI__builtin_neon_vqtbx4_v:
-  case AArch64::BI__builtin_neon_vqtbx4q_v:
-    TblPos = 1;
+  case NEON::BI__builtin_neon_vcled_s64:
+  case NEON::BI__builtin_neon_vcled_u64:
+  case NEON::BI__builtin_neon_vcles_f32:
+  case NEON::BI__builtin_neon_vcled_f64:
+  case NEON::BI__builtin_neon_vcltd_s64:
+  case NEON::BI__builtin_neon_vcltd_u64:
+  case NEON::BI__builtin_neon_vclts_f32:
+  case NEON::BI__builtin_neon_vcltd_f64:
+  case NEON::BI__builtin_neon_vcales_f32:
+  case NEON::BI__builtin_neon_vcaled_f64:
+  case NEON::BI__builtin_neon_vcalts_f32:
+  case NEON::BI__builtin_neon_vcaltd_f64:
+    // Only one direction of comparisons actually exist, cmle is actually a cmge
+    // with swapped operands. The table gives us the right intrinsic but we
+    // still need to do the swap.
+    std::swap(Ops[0], Ops[1]);
     break;
   }
 
-  assert(E->getNumArgs() >= 3);
-
-  // Get the last argument, which specifies the vector type.
-  llvm::APSInt Result;
-  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
-    return 0;
-
-  // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(Result.getZExtValue());
-  llvm::VectorType *VTy = GetNeonType(&CGF, Type);
-  llvm::Type *Ty = VTy;
-  if (!Ty)
-    return 0;
-
-  SmallVector<Value *, 4> Ops;
-  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
-    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
-  }
-
-  Arg = E->getArg(TblPos);
-  llvm::Type *TblTy = CGF.ConvertType(Arg->getType());
-  llvm::VectorType *VTblTy = cast<llvm::VectorType>(TblTy);
-  llvm::Type *Tys[2] = { Ty, VTblTy };
-  unsigned nElts = VTy->getNumElements();  
+  assert(Int && "Generic code assumes a valid intrinsic");
 
-  // AArch64 scalar builtins are not overloaded, they do not have an extra
-  // argument that specifies the vector type, need to handle each case.
-  SmallVector<Value *, 2> TblOps;
-  switch (BuiltinID) {
-  case AArch64::BI__builtin_neon_vtbl1_v: {
-    TblOps.push_back(Ops[0]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty,
-                              Intrinsic::aarch64_neon_vtbl1, "vtbl1");
-  }
-  case AArch64::BI__builtin_neon_vtbl2_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
-                              Intrinsic::aarch64_neon_vtbl1, "vtbl1");
-  }
-  case AArch64::BI__builtin_neon_vtbl3_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty,
-                              Intrinsic::aarch64_neon_vtbl2, "vtbl2");
-  }
-  case AArch64::BI__builtin_neon_vtbl4_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
-                              Intrinsic::aarch64_neon_vtbl2, "vtbl2");
-  }
-  case AArch64::BI__builtin_neon_vtbx1_v: {
-    TblOps.push_back(Ops[1]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
-                                    Intrinsic::aarch64_neon_vtbl1, "vtbl1");
-
-    llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
-    Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
-    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
-    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+  // Determine the type(s) of this overloaded AArch64 intrinsic.
+  const Expr *Arg = E->getArg(0);
+  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
+  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
 
-    SmallVector<Value *, 4> BslOps;
-    BslOps.push_back(CmpRes);
-    BslOps.push_back(Ops[0]);
-    BslOps.push_back(TblRes);
-    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
-    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
-  }
-  case AArch64::BI__builtin_neon_vtbx2_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
-                              Intrinsic::aarch64_neon_vtbx1, "vtbx1");
-  }
-  case AArch64::BI__builtin_neon_vtbx3_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
-                                       Intrinsic::aarch64_neon_vtbl2, "vtbl2");
+  int j = 0;
+  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
+  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+       ai != ae; ++ai, ++j) {
+    llvm::Type *ArgTy = ai->getType();
+    if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
+             ArgTy->getPrimitiveSizeInBits())
+      continue;
 
-    llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
-    Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
-    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
-                                           TwentyFourV);
-    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
-  
-    SmallVector<Value *, 4> BslOps;
-    BslOps.push_back(CmpRes);
-    BslOps.push_back(Ops[0]);
-    BslOps.push_back(TblRes);
-    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
-    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
-  }
-  case AArch64::BI__builtin_neon_vtbx4_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    TblOps.push_back(Ops[4]);
-    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
-                              Intrinsic::aarch64_neon_vtbx2, "vtbx2");
-  }
-  case AArch64::BI__builtin_neon_vqtbl1_v:
-  case AArch64::BI__builtin_neon_vqtbl1q_v:
-    Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
-  case AArch64::BI__builtin_neon_vqtbl2_v:
-  case AArch64::BI__builtin_neon_vqtbl2q_v: {
-    Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
-  case AArch64::BI__builtin_neon_vqtbl3_v:
-  case AArch64::BI__builtin_neon_vqtbl3q_v:
-    Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
-  case AArch64::BI__builtin_neon_vqtbl4_v:
-  case AArch64::BI__builtin_neon_vqtbl4q_v:
-    Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
-  case AArch64::BI__builtin_neon_vqtbx1_v:
-  case AArch64::BI__builtin_neon_vqtbx1q_v:
-    Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
-  case AArch64::BI__builtin_neon_vqtbx2_v:
-  case AArch64::BI__builtin_neon_vqtbx2q_v:
-    Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
-  case AArch64::BI__builtin_neon_vqtbx3_v:
-  case AArch64::BI__builtin_neon_vqtbx3q_v:
-    Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
-  case AArch64::BI__builtin_neon_vqtbx4_v:
-  case AArch64::BI__builtin_neon_vqtbx4q_v:
-    Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
-  }
+    assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
+    // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
+    // it before inserting.
+    Ops[j] =
+        CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
+    Ops[j] =
+        CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
   }
 
-  if (!Int)
-    return 0;
+  Value *Result = CGF.EmitNeonCall(F, Ops, s);
+  llvm::Type *ResultType = CGF.ConvertType(E->getType());
+  if (ResultType->getPrimitiveSizeInBits() <
+      Result->getType()->getPrimitiveSizeInBits())
+    return CGF.Builder.CreateExtractElement(Result, C0);
 
-  Function *F = CGF.CGM.getIntrinsic(Int, Tys);
-  return CGF.EmitNeonCall(F, Ops, s);
+  return CGF.Builder.CreateBitCast(Result, ResultType, s);
 }
 
-Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
-                                               const CallExpr *E) {
-  // Process AArch64 scalar builtins
-  if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
-    return Result;
-
-  // Process AArch64 table lookup builtins
-  if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
-    return Result;
-
-  if (BuiltinID == AArch64::BI__clear_cache) {
-    assert(E->getNumArgs() == 2 &&
-           "Variadic __clear_cache slipped through on AArch64");
-
-    const FunctionDecl *FD = E->getDirectCallee();
-    SmallVector<Value *, 2> Ops;
-    for (unsigned i = 0; i < E->getNumArgs(); i++)
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
-    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
-    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
-    StringRef Name = FD->getName();
-    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
-  }
-
-  SmallVector<Value *, 4> Ops;
-  llvm::Value *Align = 0; // Alignment for load/store
-  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
-    if (i == 0) {
-      switch (BuiltinID) {
-      case AArch64::BI__builtin_neon_vst1_x2_v:
-      case AArch64::BI__builtin_neon_vst1q_x2_v:
-      case AArch64::BI__builtin_neon_vst1_x3_v:
-      case AArch64::BI__builtin_neon_vst1q_x3_v:
-      case AArch64::BI__builtin_neon_vst1_x4_v:
-      case AArch64::BI__builtin_neon_vst1q_x4_v:
-      // Handle ld1/st1 lane in this function a little different from ARM.
-      case AArch64::BI__builtin_neon_vld1_lane_v:
-      case AArch64::BI__builtin_neon_vld1q_lane_v:
-      case AArch64::BI__builtin_neon_vst1_lane_v:
-      case AArch64::BI__builtin_neon_vst1q_lane_v:
-        // Get the alignment for the argument in addition to the value;
-        // we'll use it later.
-        std::pair<llvm::Value *, unsigned> Src =
-            EmitPointerWithAlignment(E->getArg(0));
-        Ops.push_back(Src.first);
-        Align = Builder.getInt32(Src.second);
-        continue;
-      }
-    }
-    if (i == 1) {
-      switch (BuiltinID) {
-      case AArch64::BI__builtin_neon_vld1_x2_v:
-      case AArch64::BI__builtin_neon_vld1q_x2_v:
-      case AArch64::BI__builtin_neon_vld1_x3_v:
-      case AArch64::BI__builtin_neon_vld1q_x3_v:
-      case AArch64::BI__builtin_neon_vld1_x4_v:
-      case AArch64::BI__builtin_neon_vld1q_x4_v:
-      // Handle ld1/st1 dup lane in this function a little different from ARM.
-      case AArch64::BI__builtin_neon_vld2_dup_v:
-      case AArch64::BI__builtin_neon_vld2q_dup_v:
-      case AArch64::BI__builtin_neon_vld3_dup_v:
-      case AArch64::BI__builtin_neon_vld3q_dup_v:
-      case AArch64::BI__builtin_neon_vld4_dup_v:
-      case AArch64::BI__builtin_neon_vld4q_dup_v:
-      case AArch64::BI__builtin_neon_vld2_lane_v:
-      case AArch64::BI__builtin_neon_vld2q_lane_v:
-        // Get the alignment for the argument in addition to the value;
-        // we'll use it later.
-        std::pair<llvm::Value *, unsigned> Src =
-            EmitPointerWithAlignment(E->getArg(1));
-        Ops.push_back(Src.first);
-        Align = Builder.getInt32(Src.second);
-        continue;
-      }
-    }
-    Ops.push_back(EmitScalarExpr(E->getArg(i)));
-  }
-
+Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
+    unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
+    const char *NameHint, unsigned Modifier, const CallExpr *E,
+    SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) {
   // Get the last argument, which specifies the vector type.
-  llvm::APSInt Result;
+  llvm::APSInt NeonTypeConst;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  if (!Arg->isIntegerConstantExpr(Result, getContext()))
-    return 0;
+  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
+    return nullptr;
 
   // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(Result.getZExtValue());
-  bool usgn = Type.isUnsigned();
-  bool quad = Type.isQuad();
+  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
+  bool Usgn = Type.isUnsigned();
+  bool Quad = Type.isQuad();
 
   llvm::VectorType *VTy = GetNeonType(this, Type);
   llvm::Type *Ty = VTy;
   if (!Ty)
-    return 0;
+    return nullptr;
+
+  unsigned Int = LLVMIntrinsic;
+  if ((Modifier & UnsignedAlts) && !Usgn)
+    Int = AltLLVMIntrinsic;
 
-  unsigned Int;
   switch (BuiltinID) {
-  default:
-    return 0;
-
-  // AArch64 builtins mapping to legacy ARM v7 builtins.
-  // FIXME: the mapped builtins listed correspond to what has been tested
-  // in aarch64-neon-intrinsics.c so far.
-  case AArch64::BI__builtin_neon_vuzp_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzp_v, E);
-  case AArch64::BI__builtin_neon_vuzpq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzpq_v, E);
-  case AArch64::BI__builtin_neon_vzip_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzip_v, E);
-  case AArch64::BI__builtin_neon_vzipq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzipq_v, E);
-  case AArch64::BI__builtin_neon_vtrn_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrn_v, E);
-  case AArch64::BI__builtin_neon_vtrnq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrnq_v, E);
-  case AArch64::BI__builtin_neon_vext_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E);
-  case AArch64::BI__builtin_neon_vextq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vextq_v, E);
-  case AArch64::BI__builtin_neon_vmul_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E);
-  case AArch64::BI__builtin_neon_vmulq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E);
-  case AArch64::BI__builtin_neon_vabd_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E);
-  case AArch64::BI__builtin_neon_vabdq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E);
-  case AArch64::BI__builtin_neon_vfma_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E);
-  case AArch64::BI__builtin_neon_vfmaq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E);
-  case AArch64::BI__builtin_neon_vbsl_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E);
-  case AArch64::BI__builtin_neon_vbslq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E);
-  case AArch64::BI__builtin_neon_vrsqrts_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E);
-  case AArch64::BI__builtin_neon_vrsqrtsq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E);
-  case AArch64::BI__builtin_neon_vrecps_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E);
-  case AArch64::BI__builtin_neon_vrecpsq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E);
-  case AArch64::BI__builtin_neon_vcale_v:
-    if (VTy->getVectorNumElements() == 1) {
-      std::swap(Ops[0], Ops[1]);
-    } else {
-      return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E);
-    }
-  case AArch64::BI__builtin_neon_vcage_v:
-    if (VTy->getVectorNumElements() == 1) {
-      // Determine the types of this overloaded AArch64 intrinsic
-      SmallVector<llvm::Type *, 3> Tys;
-      Tys.push_back(VTy);
-      VTy = llvm::VectorType::get(DoubleTy, 1);
-      Tys.push_back(VTy);
-      Tys.push_back(VTy);
-      Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcage, Tys);
-      return EmitNeonCall(F, Ops, "vcage");
-    }
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E);
-  case AArch64::BI__builtin_neon_vcaleq_v:
-    std::swap(Ops[0], Ops[1]);
-  case AArch64::BI__builtin_neon_vcageq_v: {
-    Function *F;
-    if (VTy->getElementType()->isIntegerTy(64))
-      F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq);
-    else
-      F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
-    return EmitNeonCall(F, Ops, "vcage");
+  default: break;
+  case NEON::BI__builtin_neon_vabs_v:
+  case NEON::BI__builtin_neon_vabsq_v:
+    if (VTy->getElementType()->isFloatingPointTy())
+      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
+    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
+  case NEON::BI__builtin_neon_vaddhn_v: {
+    llvm::VectorType *SrcTy =
+        llvm::VectorType::getExtendedElementVectorType(VTy);
+
+    // %sum = add <4 x i32> %lhs, %rhs
+    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
+    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
+
+    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
+                                       SrcTy->getScalarSizeInBits() / 2);
+    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
+
+    // %res = trunc <4 x i32> %high to <4 x i16>
+    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
   }
-  case AArch64::BI__builtin_neon_vcalt_v:
-    if (VTy->getVectorNumElements() == 1) {
-      std::swap(Ops[0], Ops[1]);
-    } else {
-      return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E);
-    }
-  case AArch64::BI__builtin_neon_vcagt_v:
-    if (VTy->getVectorNumElements() == 1) {
-      // Determine the types of this overloaded AArch64 intrinsic
-      SmallVector<llvm::Type *, 3> Tys;
-      Tys.push_back(VTy);
-      VTy = llvm::VectorType::get(DoubleTy, 1);
-      Tys.push_back(VTy);
-      Tys.push_back(VTy);
-      Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcagt, Tys);
-      return EmitNeonCall(F, Ops, "vcagt");
-    }
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E);
-  case AArch64::BI__builtin_neon_vcaltq_v:
+  case NEON::BI__builtin_neon_vcale_v:
+  case NEON::BI__builtin_neon_vcaleq_v:
+  case NEON::BI__builtin_neon_vcalt_v:
+  case NEON::BI__builtin_neon_vcaltq_v:
     std::swap(Ops[0], Ops[1]);
-  case AArch64::BI__builtin_neon_vcagtq_v: {
-    Function *F;
-    if (VTy->getElementType()->isIntegerTy(64))
-      F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq);
-    else
-      F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
-    return EmitNeonCall(F, Ops, "vcagt");
-  }
-  case AArch64::BI__builtin_neon_vtst_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E);
-  case AArch64::BI__builtin_neon_vtstq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E);
-  case AArch64::BI__builtin_neon_vhadd_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E);
-  case AArch64::BI__builtin_neon_vhaddq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E);
-  case AArch64::BI__builtin_neon_vhsub_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E);
-  case AArch64::BI__builtin_neon_vhsubq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E);
-  case AArch64::BI__builtin_neon_vrhadd_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E);
-  case AArch64::BI__builtin_neon_vrhaddq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E);
-  case AArch64::BI__builtin_neon_vqadd_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E);
-  case AArch64::BI__builtin_neon_vqaddq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E);
-  case AArch64::BI__builtin_neon_vqsub_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E);
-  case AArch64::BI__builtin_neon_vqsubq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E);
-  case AArch64::BI__builtin_neon_vshl_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E);
-  case AArch64::BI__builtin_neon_vshlq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E);
-  case AArch64::BI__builtin_neon_vqshl_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E);
-  case AArch64::BI__builtin_neon_vqshlq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E);
-  case AArch64::BI__builtin_neon_vrshl_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E);
-  case AArch64::BI__builtin_neon_vrshlq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E);
-  case AArch64::BI__builtin_neon_vqrshl_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
-  case AArch64::BI__builtin_neon_vqrshlq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
-  case AArch64::BI__builtin_neon_vaddhn_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E);
-  case AArch64::BI__builtin_neon_vraddhn_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E);
-  case AArch64::BI__builtin_neon_vsubhn_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E);
-  case AArch64::BI__builtin_neon_vrsubhn_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E);
-  case AArch64::BI__builtin_neon_vmull_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E);
-  case AArch64::BI__builtin_neon_vqdmull_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E);
-  case AArch64::BI__builtin_neon_vqdmlal_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E);
-  case AArch64::BI__builtin_neon_vqdmlsl_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E);
-  case AArch64::BI__builtin_neon_vmax_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
-  case AArch64::BI__builtin_neon_vmaxq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E);
-  case AArch64::BI__builtin_neon_vmin_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E);
-  case AArch64::BI__builtin_neon_vminq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E);
-  case AArch64::BI__builtin_neon_vpmax_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E);
-  case AArch64::BI__builtin_neon_vpmin_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E);
-  case AArch64::BI__builtin_neon_vpadd_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E);
-  case AArch64::BI__builtin_neon_vqdmulh_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E);
-  case AArch64::BI__builtin_neon_vqdmulhq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E);
-  case AArch64::BI__builtin_neon_vqrdmulh_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
-  case AArch64::BI__builtin_neon_vqrdmulhq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
-
-  // Shift by immediate
-  case AArch64::BI__builtin_neon_vshr_n_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E);
-  case AArch64::BI__builtin_neon_vshrq_n_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E);
-  case AArch64::BI__builtin_neon_vrshr_n_v:
-  case AArch64::BI__builtin_neon_vrshrq_n_v:
-    Int = usgn ? Intrinsic::aarch64_neon_vurshr
-               : Intrinsic::aarch64_neon_vsrshr;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n");
-  case AArch64::BI__builtin_neon_vsra_n_v:
-    if (VTy->getElementType()->isIntegerTy(64)) {
-      Int = usgn ? Intrinsic::aarch64_neon_vsradu_n
-                 : Intrinsic::aarch64_neon_vsrads_n;
-      return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n");
-    }
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E);
-  case AArch64::BI__builtin_neon_vsraq_n_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E);
-  case AArch64::BI__builtin_neon_vrsra_n_v:
-    if (VTy->getElementType()->isIntegerTy(64)) {
-      Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n
-                 : Intrinsic::aarch64_neon_vrsrads_n;
-      return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n");
-    }
-    // fall through
-  case AArch64::BI__builtin_neon_vrsraq_n_v: {
+  case NEON::BI__builtin_neon_vcage_v:
+  case NEON::BI__builtin_neon_vcageq_v:
+  case NEON::BI__builtin_neon_vcagt_v:
+  case NEON::BI__builtin_neon_vcagtq_v: {
+    llvm::Type *VecFlt = llvm::VectorType::get(
+        VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
+        VTy->getNumElements());
+    llvm::Type *Tys[] = { VTy, VecFlt };
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
+    return EmitNeonCall(F, Ops, NameHint);
+  }
+  case NEON::BI__builtin_neon_vclz_v:
+  case NEON::BI__builtin_neon_vclzq_v:
+    // We generate target-independent intrinsic, which needs a second argument
+    // for whether or not clz of zero is undefined; on ARM it isn't.
+    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
+    break;
+  case NEON::BI__builtin_neon_vcvt_f32_v:
+  case NEON::BI__builtin_neon_vcvtq_f32_v:
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Int = usgn ? Intrinsic::aarch64_neon_vurshr
-               : Intrinsic::aarch64_neon_vsrshr;
-    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
-    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
-  }
-  case AArch64::BI__builtin_neon_vshl_n_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E);
-  case AArch64::BI__builtin_neon_vshlq_n_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E);
-  case AArch64::BI__builtin_neon_vqshl_n_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E);
-  case AArch64::BI__builtin_neon_vqshlq_n_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E);
-  case AArch64::BI__builtin_neon_vqshlu_n_v:
-  case AArch64::BI__builtin_neon_vqshluq_n_v:
-    Int = Intrinsic::aarch64_neon_vsqshlu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n");
-  case AArch64::BI__builtin_neon_vsri_n_v:
-  case AArch64::BI__builtin_neon_vsriq_n_v:
-    Int = Intrinsic::aarch64_neon_vsri;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n");
-  case AArch64::BI__builtin_neon_vsli_n_v:
-  case AArch64::BI__builtin_neon_vsliq_n_v:
-    Int = Intrinsic::aarch64_neon_vsli;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n");
-  case AArch64::BI__builtin_neon_vshll_n_v: {
-    llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
-    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
-    if (usgn)
-      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
-    else
-      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
-    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
-    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
-  }
-  case AArch64::BI__builtin_neon_vshrn_n_v: {
-    llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
-    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
-    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
-    if (usgn)
-      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
-    else
-      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
-    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
-  }
-  case AArch64::BI__builtin_neon_vqshrun_n_v:
-    Int = Intrinsic::aarch64_neon_vsqshrun;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
-  case AArch64::BI__builtin_neon_vrshrn_n_v:
-    Int = Intrinsic::aarch64_neon_vrshrn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
-  case AArch64::BI__builtin_neon_vqrshrun_n_v:
-    Int = Intrinsic::aarch64_neon_vsqrshrun;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
-  case AArch64::BI__builtin_neon_vqshrn_n_v:
-    Int = usgn ? Intrinsic::aarch64_neon_vuqshrn
-               : Intrinsic::aarch64_neon_vsqshrn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
-  case AArch64::BI__builtin_neon_vqrshrn_n_v:
-    Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn
-               : Intrinsic::aarch64_neon_vsqrshrn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
-
-  // Convert
-  case AArch64::BI__builtin_neon_vmovl_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E);
-  case AArch64::BI__builtin_neon_vcvt_n_f32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E);
-  case AArch64::BI__builtin_neon_vcvtq_n_f32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E);
-  case AArch64::BI__builtin_neon_vcvt_n_f64_v:
-  case AArch64::BI__builtin_neon_vcvtq_n_f64_v: {
+    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
+    return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
+                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+  case NEON::BI__builtin_neon_vcvt_n_f32_v:
+  case NEON::BI__builtin_neon_vcvt_n_f64_v:
+  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
+  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
     llvm::Type *FloatTy =
-        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
+        GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                               : NeonTypeFlags::Float32,
+                                        false, Quad));
     llvm::Type *Tys[2] = { FloatTy, Ty };
-    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
-               : Intrinsic::arm_neon_vcvtfxs2fp;
+    Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
     Function *F = CGM.getIntrinsic(Int, Tys);
     return EmitNeonCall(F, Ops, "vcvt_n");
   }
-  case AArch64::BI__builtin_neon_vcvt_n_s32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E);
-  case AArch64::BI__builtin_neon_vcvtq_n_s32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E);
-  case AArch64::BI__builtin_neon_vcvt_n_u32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E);
-  case AArch64::BI__builtin_neon_vcvtq_n_u32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E);
-  case AArch64::BI__builtin_neon_vcvt_n_s64_v:
-  case AArch64::BI__builtin_neon_vcvt_n_u64_v:
-  case AArch64::BI__builtin_neon_vcvtq_n_s64_v:
-  case AArch64::BI__builtin_neon_vcvtq_n_u64_v: {
+  case NEON::BI__builtin_neon_vcvt_n_s32_v:
+  case NEON::BI__builtin_neon_vcvt_n_u32_v:
+  case NEON::BI__builtin_neon_vcvt_n_s64_v:
+  case NEON::BI__builtin_neon_vcvt_n_u64_v:
+  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
+  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
+  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
+  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
     llvm::Type *FloatTy =
-        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
+        GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                               : NeonTypeFlags::Float32,
+                                        false, Quad));
     llvm::Type *Tys[2] = { Ty, FloatTy };
-    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
-               : Intrinsic::arm_neon_vcvtfp2fxs;
-    Function *F = CGM.getIntrinsic(Int, Tys);
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
     return EmitNeonCall(F, Ops, "vcvt_n");
   }
+  case NEON::BI__builtin_neon_vcvt_s32_v:
+  case NEON::BI__builtin_neon_vcvt_u32_v:
+  case NEON::BI__builtin_neon_vcvt_s64_v:
+  case NEON::BI__builtin_neon_vcvt_u64_v:
+  case NEON::BI__builtin_neon_vcvtq_s32_v:
+  case NEON::BI__builtin_neon_vcvtq_u32_v:
+  case NEON::BI__builtin_neon_vcvtq_s64_v:
+  case NEON::BI__builtin_neon_vcvtq_u64_v: {
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+    llvm::Type *FloatTy =
+        GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                               : NeonTypeFlags::Float32,
+                                        false, Quad));
+    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
+    return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
+                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
+  }
+  case NEON::BI__builtin_neon_vcvta_s32_v:
+  case NEON::BI__builtin_neon_vcvta_s64_v:
+  case NEON::BI__builtin_neon_vcvta_u32_v:
+  case NEON::BI__builtin_neon_vcvta_u64_v:
+  case NEON::BI__builtin_neon_vcvtaq_s32_v:
+  case NEON::BI__builtin_neon_vcvtaq_s64_v:
+  case NEON::BI__builtin_neon_vcvtaq_u32_v:
+  case NEON::BI__builtin_neon_vcvtaq_u64_v:
+  case NEON::BI__builtin_neon_vcvtn_s32_v:
+  case NEON::BI__builtin_neon_vcvtn_s64_v:
+  case NEON::BI__builtin_neon_vcvtn_u32_v:
+  case NEON::BI__builtin_neon_vcvtn_u64_v:
+  case NEON::BI__builtin_neon_vcvtnq_s32_v:
+  case NEON::BI__builtin_neon_vcvtnq_s64_v:
+  case NEON::BI__builtin_neon_vcvtnq_u32_v:
+  case NEON::BI__builtin_neon_vcvtnq_u64_v:
+  case NEON::BI__builtin_neon_vcvtp_s32_v:
+  case NEON::BI__builtin_neon_vcvtp_s64_v:
+  case NEON::BI__builtin_neon_vcvtp_u32_v:
+  case NEON::BI__builtin_neon_vcvtp_u64_v:
+  case NEON::BI__builtin_neon_vcvtpq_s32_v:
+  case NEON::BI__builtin_neon_vcvtpq_s64_v:
+  case NEON::BI__builtin_neon_vcvtpq_u32_v:
+  case NEON::BI__builtin_neon_vcvtpq_u64_v:
+  case NEON::BI__builtin_neon_vcvtm_s32_v:
+  case NEON::BI__builtin_neon_vcvtm_s64_v:
+  case NEON::BI__builtin_neon_vcvtm_u32_v:
+  case NEON::BI__builtin_neon_vcvtm_u64_v:
+  case NEON::BI__builtin_neon_vcvtmq_s32_v:
+  case NEON::BI__builtin_neon_vcvtmq_s64_v:
+  case NEON::BI__builtin_neon_vcvtmq_u32_v:
+  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+    llvm::Type *InTy =
+      GetNeonType(this,
+                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                : NeonTypeFlags::Float32, false, Quad));
+    llvm::Type *Tys[2] = { Ty, InTy };
+    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
+  }
+  case NEON::BI__builtin_neon_vext_v:
+  case NEON::BI__builtin_neon_vextq_v: {
+    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
+    SmallVector<Constant*, 16> Indices;
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
+      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
 
-  // Load/Store
-  case AArch64::BI__builtin_neon_vld1_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_v, E);
-  case AArch64::BI__builtin_neon_vld1q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_v, E);
-  case AArch64::BI__builtin_neon_vld2_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2_v, E);
-  case AArch64::BI__builtin_neon_vld2q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_v, E);
-  case AArch64::BI__builtin_neon_vld3_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_v, E);
-  case AArch64::BI__builtin_neon_vld3q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_v, E);
-  case AArch64::BI__builtin_neon_vld4_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_v, E);
-  case AArch64::BI__builtin_neon_vld4q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_v, E);
-  case AArch64::BI__builtin_neon_vst1_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1_v, E);
-  case AArch64::BI__builtin_neon_vst1q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1q_v, E);
-  case AArch64::BI__builtin_neon_vst2_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_v, E);
-  case AArch64::BI__builtin_neon_vst2q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_v, E);
-  case AArch64::BI__builtin_neon_vst3_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_v, E);
-  case AArch64::BI__builtin_neon_vst3q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_v, E);
-  case AArch64::BI__builtin_neon_vst4_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
-  case AArch64::BI__builtin_neon_vst4q_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
-  case AArch64::BI__builtin_neon_vld1_x2_v:
-  case AArch64::BI__builtin_neon_vld1q_x2_v:
-  case AArch64::BI__builtin_neon_vld1_x3_v:
-  case AArch64::BI__builtin_neon_vld1q_x3_v:
-  case AArch64::BI__builtin_neon_vld1_x4_v:
-  case AArch64::BI__builtin_neon_vld1q_x4_v: {
-    unsigned Int;
-    switch (BuiltinID) {
-    case AArch64::BI__builtin_neon_vld1_x2_v:
-    case AArch64::BI__builtin_neon_vld1q_x2_v:
-      Int = Intrinsic::aarch64_neon_vld1x2;
-      break;
-    case AArch64::BI__builtin_neon_vld1_x3_v:
-    case AArch64::BI__builtin_neon_vld1q_x3_v:
-      Int = Intrinsic::aarch64_neon_vld1x3;
-      break;
-    case AArch64::BI__builtin_neon_vld1_x4_v:
-    case AArch64::BI__builtin_neon_vld1q_x4_v:
-      Int = Intrinsic::aarch64_neon_vld1x4;
-      break;
-    }
-    Function *F = CGM.getIntrinsic(Int, Ty);
-    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Value *SV = llvm::ConstantVector::get(Indices);
+    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
+  }
+  case NEON::BI__builtin_neon_vfma_v:
+  case NEON::BI__builtin_neon_vfmaq_v: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+
+    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
   }
-  case AArch64::BI__builtin_neon_vst1_x2_v:
-  case AArch64::BI__builtin_neon_vst1q_x2_v:
-  case AArch64::BI__builtin_neon_vst1_x3_v:
-  case AArch64::BI__builtin_neon_vst1q_x3_v:
-  case AArch64::BI__builtin_neon_vst1_x4_v:
-  case AArch64::BI__builtin_neon_vst1q_x4_v: {
+  case NEON::BI__builtin_neon_vld1_v:
+  case NEON::BI__builtin_neon_vld1q_v:
     Ops.push_back(Align);
-    unsigned Int;
-    switch (BuiltinID) {
-    case AArch64::BI__builtin_neon_vst1_x2_v:
-    case AArch64::BI__builtin_neon_vst1q_x2_v:
-      Int = Intrinsic::aarch64_neon_vst1x2;
-      break;
-    case AArch64::BI__builtin_neon_vst1_x3_v:
-    case AArch64::BI__builtin_neon_vst1q_x3_v:
-      Int = Intrinsic::aarch64_neon_vst1x3;
-      break;
-    case AArch64::BI__builtin_neon_vst1_x4_v:
-    case AArch64::BI__builtin_neon_vst1q_x4_v:
-      Int = Intrinsic::aarch64_neon_vst1x4;
-      break;
-    }
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
+    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1");
+  case NEON::BI__builtin_neon_vld2_v:
+  case NEON::BI__builtin_neon_vld2q_v:
+  case NEON::BI__builtin_neon_vld3_v:
+  case NEON::BI__builtin_neon_vld3q_v:
+  case NEON::BI__builtin_neon_vld4_v:
+  case NEON::BI__builtin_neon_vld4q_v: {
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint);
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return Builder.CreateStore(Ops[1], Ops[0]);
   }
-  case AArch64::BI__builtin_neon_vld1_lane_v:
-  case AArch64::BI__builtin_neon_vld1q_lane_v: {
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+  case NEON::BI__builtin_neon_vld1_dup_v:
+  case NEON::BI__builtin_neon_vld1q_dup_v: {
+    Value *V = UndefValue::get(Ty);
     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     LoadInst *Ld = Builder.CreateLoad(Ops[0]);
     Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
-    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
-  }
-  case AArch64::BI__builtin_neon_vld2_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
-  case AArch64::BI__builtin_neon_vld2q_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E);
-  case AArch64::BI__builtin_neon_vld3_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_lane_v, E);
-  case AArch64::BI__builtin_neon_vld3q_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_lane_v, E);
-  case AArch64::BI__builtin_neon_vld4_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_lane_v, E);
-  case AArch64::BI__builtin_neon_vld4q_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_lane_v, E);
-  case AArch64::BI__builtin_neon_vst1_lane_v:
-  case AArch64::BI__builtin_neon_vst1q_lane_v: {
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    StoreInst *St =
-        Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
-    St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
-    return St;
+    llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
+    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
+    return EmitNeonSplat(Ops[0], CI);
   }
-  case AArch64::BI__builtin_neon_vst2_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_lane_v, E);
-  case AArch64::BI__builtin_neon_vst2q_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_lane_v, E);
-  case AArch64::BI__builtin_neon_vst3_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_lane_v, E);
-  case AArch64::BI__builtin_neon_vst3q_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_lane_v, E);
-  case AArch64::BI__builtin_neon_vst4_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_lane_v, E);
-  case AArch64::BI__builtin_neon_vst4q_lane_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_lane_v, E);
-  case AArch64::BI__builtin_neon_vld1_dup_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_dup_v, E);
-  case AArch64::BI__builtin_neon_vld1q_dup_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_dup_v, E);
-  case AArch64::BI__builtin_neon_vld2_dup_v:
-  case AArch64::BI__builtin_neon_vld2q_dup_v:
-  case AArch64::BI__builtin_neon_vld3_dup_v:
-  case AArch64::BI__builtin_neon_vld3q_dup_v:
-  case AArch64::BI__builtin_neon_vld4_dup_v:
-  case AArch64::BI__builtin_neon_vld4q_dup_v: {
-    // Handle 64-bit x 1 elements as a special-case.  There is no "dup" needed.
-    if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 &&
-        VTy->getNumElements() == 1) {
-      switch (BuiltinID) {
-      case AArch64::BI__builtin_neon_vld2_dup_v:
-        Int = Intrinsic::arm_neon_vld2;
-        break;
-      case AArch64::BI__builtin_neon_vld3_dup_v:
-        Int = Intrinsic::arm_neon_vld3;
-        break;
-      case AArch64::BI__builtin_neon_vld4_dup_v:
-        Int = Intrinsic::arm_neon_vld4;
-        break;
-      default:
-        llvm_unreachable("unknown vld_dup intrinsic?");
-      }
-      Function *F = CGM.getIntrinsic(Int, Ty);
-      Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
-      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-      return Builder.CreateStore(Ops[1], Ops[0]);
-    }
-    switch (BuiltinID) {
-    case AArch64::BI__builtin_neon_vld2_dup_v:
-    case AArch64::BI__builtin_neon_vld2q_dup_v:
-      Int = Intrinsic::arm_neon_vld2lane;
-      break;
-    case AArch64::BI__builtin_neon_vld3_dup_v:
-    case AArch64::BI__builtin_neon_vld3q_dup_v:
-      Int = Intrinsic::arm_neon_vld3lane;
-      break;
-    case AArch64::BI__builtin_neon_vld4_dup_v:
-    case AArch64::BI__builtin_neon_vld4q_dup_v:
-      Int = Intrinsic::arm_neon_vld4lane;
-      break;
-    }
-    Function *F = CGM.getIntrinsic(Int, Ty);
-    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
-
-    SmallVector<Value *, 6> Args;
-    Args.push_back(Ops[1]);
-    Args.append(STy->getNumElements(), UndefValue::get(Ty));
-
-    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
-    Args.push_back(CI);
-    Args.push_back(Align);
-
-    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
-    // splat lane 0 to all elts in each vector of the result.
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      Value *Val = Builder.CreateExtractValue(Ops[1], i);
-      Value *Elt = Builder.CreateBitCast(Val, Ty);
-      Elt = EmitNeonSplat(Elt, CI);
-      Elt = Builder.CreateBitCast(Elt, Val->getType());
-      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
-    }
+  case NEON::BI__builtin_neon_vld2_lane_v:
+  case NEON::BI__builtin_neon_vld2q_lane_v:
+  case NEON::BI__builtin_neon_vld3_lane_v:
+  case NEON::BI__builtin_neon_vld3q_lane_v:
+  case NEON::BI__builtin_neon_vld4_lane_v:
+  case NEON::BI__builtin_neon_vld4q_lane_v: {
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+    for (unsigned I = 2; I < Ops.size() - 1; ++I)
+      Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
+    Ops.push_back(Align);
+    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     return Builder.CreateStore(Ops[1], Ops[0]);
   }
+  case NEON::BI__builtin_neon_vmovl_v: {
+    llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
+    if (Usgn)
+      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
+    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
+  }
+  case NEON::BI__builtin_neon_vmovn_v: {
+    llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
+    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
+  }
+  case NEON::BI__builtin_neon_vmull_v:
+    // FIXME: the integer vmull operations could be emitted in terms of pure
+    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
+    // hoisting the exts outside loops. Until global ISel comes along that can
+    // see through such movement this leads to bad CodeGen. So we need an
+    // intrinsic for now.
+    Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
+    Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
+  case NEON::BI__builtin_neon_vpadal_v:
+  case NEON::BI__builtin_neon_vpadalq_v: {
+    // The source operand type has twice as many elements of half the size.
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    llvm::Type *EltTy =
+      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
+    llvm::Type *NarrowTy =
+      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
+    llvm::Type *Tys[2] = { Ty, NarrowTy };
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
+  }
+  case NEON::BI__builtin_neon_vpaddl_v:
+  case NEON::BI__builtin_neon_vpaddlq_v: {
+    // The source operand type has twice as many elements of half the size.
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
+    llvm::Type *NarrowTy =
+      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
+    llvm::Type *Tys[2] = { Ty, NarrowTy };
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
+  }
+  case NEON::BI__builtin_neon_vqdmlal_v:
+  case NEON::BI__builtin_neon_vqdmlsl_v: {
+    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
+    Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty),
+                              MulOps, "vqdmlal");
 
-  // Crypto
-  case AArch64::BI__builtin_neon_vaeseq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese, Ty),
-                        Ops, "aese");
-  case AArch64::BI__builtin_neon_vaesdq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd, Ty),
-                        Ops, "aesd");
-  case AArch64::BI__builtin_neon_vaesmcq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc, Ty),
-                        Ops, "aesmc");
-  case AArch64::BI__builtin_neon_vaesimcq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc, Ty),
-                        Ops, "aesimc");
-  case AArch64::BI__builtin_neon_vsha1su1q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1, Ty),
-                        Ops, "sha1su1");
-  case AArch64::BI__builtin_neon_vsha256su0q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0, Ty),
-                        Ops, "sha256su0");
-  case AArch64::BI__builtin_neon_vsha1su0q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0, Ty),
-                        Ops, "sha1su0");
-  case AArch64::BI__builtin_neon_vsha256hq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h, Ty),
-                        Ops, "sha256h");
-  case AArch64::BI__builtin_neon_vsha256h2q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2, Ty),
-                        Ops, "sha256h2");
-  case AArch64::BI__builtin_neon_vsha256su1q_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1, Ty),
-                        Ops, "sha256su1");
-  case AArch64::BI__builtin_neon_vmul_lane_v:
-  case AArch64::BI__builtin_neon_vmul_laneq_v: {
-    // v1f64 vmul_lane should be mapped to Neon scalar mul lane
-    bool Quad = false;
-    if (BuiltinID == AArch64::BI__builtin_neon_vmul_laneq_v)
-      Quad = true;
-    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
-    llvm::Type *VTy = GetNeonType(this,
-      NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
-    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
-    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
-    Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
-    return Builder.CreateBitCast(Result, Ty);
+    SmallVector<Value *, 2> AccumOps;
+    AccumOps.push_back(Ops[0]);
+    AccumOps.push_back(Mul);
+    return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty),
+                        AccumOps, NameHint);
+  }
+  case NEON::BI__builtin_neon_vqshl_n_v:
+  case NEON::BI__builtin_neon_vqshlq_n_v:
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
+                        1, false);
+  case NEON::BI__builtin_neon_vrecpe_v:
+  case NEON::BI__builtin_neon_vrecpeq_v:
+  case NEON::BI__builtin_neon_vrsqrte_v:
+  case NEON::BI__builtin_neon_vrsqrteq_v:
+    Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
+
+  case NEON::BI__builtin_neon_vshl_n_v:
+  case NEON::BI__builtin_neon_vshlq_n_v:
+    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
+    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
+                             "vshl_n");
+  case NEON::BI__builtin_neon_vshll_n_v: {
+    llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+    if (Usgn)
+      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
+    else
+      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
+    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
+    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
   }
+  case NEON::BI__builtin_neon_vshrn_n_v: {
+    llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
+    if (Usgn)
+      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
+    else
+      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
+    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
+  }
+  case NEON::BI__builtin_neon_vshr_n_v:
+  case NEON::BI__builtin_neon_vshrq_n_v:
+    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
+  case NEON::BI__builtin_neon_vst1_v:
+  case NEON::BI__builtin_neon_vst1q_v:
+  case NEON::BI__builtin_neon_vst2_v:
+  case NEON::BI__builtin_neon_vst2q_v:
+  case NEON::BI__builtin_neon_vst3_v:
+  case NEON::BI__builtin_neon_vst3q_v:
+  case NEON::BI__builtin_neon_vst4_v:
+  case NEON::BI__builtin_neon_vst4q_v:
+  case NEON::BI__builtin_neon_vst2_lane_v:
+  case NEON::BI__builtin_neon_vst2q_lane_v:
+  case NEON::BI__builtin_neon_vst3_lane_v:
+  case NEON::BI__builtin_neon_vst3q_lane_v:
+  case NEON::BI__builtin_neon_vst4_lane_v:
+  case NEON::BI__builtin_neon_vst4q_lane_v:
+    Ops.push_back(Align);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
+  case NEON::BI__builtin_neon_vsubhn_v: {
+    llvm::VectorType *SrcTy =
+        llvm::VectorType::getExtendedElementVectorType(VTy);
 
-  // AArch64-only builtins
-  case AArch64::BI__builtin_neon_vfmaq_laneq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    // %sum = add <4 x i32> %lhs, %rhs
+    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
+    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
 
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
+                                       SrcTy->getScalarSizeInBits() / 2);
+    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
+
+    // %res = trunc <4 x i32> %high to <4 x i16>
+    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
   }
-  case AArch64::BI__builtin_neon_vfmaq_lane_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+  case NEON::BI__builtin_neon_vtrn_v:
+  case NEON::BI__builtin_neon_vtrnq_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+    Value *SV = nullptr;
 
-    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
-                                            VTy->getNumElements() / 2);
-    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
-    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
-                                               cast<ConstantInt>(Ops[3]));
-    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
-
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
-  }
-  case AArch64::BI__builtin_neon_vfma_lane_v: {
-    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-    // v1f64 fma should be mapped to Neon scalar f64 fma
-    if (VTy && VTy->getElementType() == DoubleTy) {
-      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
-      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
-      llvm::Type *VTy = GetNeonType(this,
-        NeonTypeFlags(NeonTypeFlags::Float64, false, false));
-      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
-      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
-      Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
-      Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-      return Builder.CreateBitCast(Result, Ty);
+    for (unsigned vi = 0; vi != 2; ++vi) {
+      SmallVector<Constant*, 16> Indices;
+      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+        Indices.push_back(Builder.getInt32(i+vi));
+        Indices.push_back(Builder.getInt32(i+e+vi));
+      }
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      SV = llvm::ConstantVector::get(Indices);
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+      SV = Builder.CreateStore(SV, Addr);
     }
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+    return SV;
+  }
+  case NEON::BI__builtin_neon_vtst_v:
+  case NEON::BI__builtin_neon_vtstq_v: {
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
+    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
+                                ConstantAggregateZero::get(Ty));
+    return Builder.CreateSExt(Ops[0], Ty, "vtst");
   }
-  case AArch64::BI__builtin_neon_vfma_laneq_v: {
-    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-    // v1f64 fma should be mapped to Neon scalar f64 fma
-    if (VTy && VTy->getElementType() == DoubleTy) {
-      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
-      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
-      llvm::Type *VTy = GetNeonType(this,
-        NeonTypeFlags(NeonTypeFlags::Float64, false, true));
-      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
-      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
-      Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
-      Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-      return Builder.CreateBitCast(Result, Ty);
-    }
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+  case NEON::BI__builtin_neon_vuzp_v:
+  case NEON::BI__builtin_neon_vuzpq_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+    Value *SV = nullptr;
 
-    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
-                                            VTy->getNumElements() * 2);
-    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
-    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
-                                               cast<ConstantInt>(Ops[3]));
-    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
+    for (unsigned vi = 0; vi != 2; ++vi) {
+      SmallVector<Constant*, 16> Indices;
+      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
+        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
 
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      SV = llvm::ConstantVector::get(Indices);
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+      SV = Builder.CreateStore(SV, Addr);
+    }
+    return SV;
   }
-  case AArch64::BI__builtin_neon_vfms_v:
-  case AArch64::BI__builtin_neon_vfmsq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+  case NEON::BI__builtin_neon_vzip_v:
+  case NEON::BI__builtin_neon_vzipq_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[1] = Builder.CreateFNeg(Ops[1]);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+    Value *SV = nullptr;
 
-    // LLVM's fma intrinsic puts the accumulator in the last position, but the
-    // AArch64 intrinsic has it first.
-    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-  }
-  case AArch64::BI__builtin_neon_vmaxnm_v:
-  case AArch64::BI__builtin_neon_vmaxnmq_v: {
-    Int = Intrinsic::aarch64_neon_vmaxnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
-  }
-  case AArch64::BI__builtin_neon_vminnm_v:
-  case AArch64::BI__builtin_neon_vminnmq_v: {
-    Int = Intrinsic::aarch64_neon_vminnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
-  }
-  case AArch64::BI__builtin_neon_vpmaxnm_v:
-  case AArch64::BI__builtin_neon_vpmaxnmq_v: {
-    Int = Intrinsic::aarch64_neon_vpmaxnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
-  }
-  case AArch64::BI__builtin_neon_vpminnm_v:
-  case AArch64::BI__builtin_neon_vpminnmq_v: {
-    Int = Intrinsic::aarch64_neon_vpminnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
-  }
-  case AArch64::BI__builtin_neon_vpmaxq_v: {
-    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
-  }
-  case AArch64::BI__builtin_neon_vpminq_v: {
-    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
-  }
-  case AArch64::BI__builtin_neon_vpaddq_v: {
-    Int = Intrinsic::arm_neon_vpadd;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
-  }
-  case AArch64::BI__builtin_neon_vmulx_v:
-  case AArch64::BI__builtin_neon_vmulxq_v: {
-    Int = Intrinsic::aarch64_neon_vmulx;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
-  }
-  case AArch64::BI__builtin_neon_vpaddl_v:
-  case AArch64::BI__builtin_neon_vpaddlq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpaddl_v, E);
-  case AArch64::BI__builtin_neon_vpadal_v:
-  case AArch64::BI__builtin_neon_vpadalq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadal_v, E);
-  case AArch64::BI__builtin_neon_vqabs_v:
-  case AArch64::BI__builtin_neon_vqabsq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqabs_v, E);
-  case AArch64::BI__builtin_neon_vqneg_v:
-  case AArch64::BI__builtin_neon_vqnegq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqneg_v, E);
-  case AArch64::BI__builtin_neon_vabs_v:
-  case AArch64::BI__builtin_neon_vabsq_v: {
-    if (VTy->getElementType()->isFloatingPointTy()) {
-      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
+    for (unsigned vi = 0; vi != 2; ++vi) {
+      SmallVector<Constant*, 16> Indices;
+      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
+        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+      }
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      SV = llvm::ConstantVector::get(Indices);
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+      SV = Builder.CreateStore(SV, Addr);
     }
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabs_v, E);
-  }
-  case AArch64::BI__builtin_neon_vsqadd_v:
-  case AArch64::BI__builtin_neon_vsqaddq_v: {
-    Int = Intrinsic::aarch64_neon_usqadd;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
-  }
-  case AArch64::BI__builtin_neon_vuqadd_v:
-  case AArch64::BI__builtin_neon_vuqaddq_v: {
-    Int = Intrinsic::aarch64_neon_suqadd;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
-  }
-  case AArch64::BI__builtin_neon_vcls_v:
-  case AArch64::BI__builtin_neon_vclsq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcls_v, E);
-  case AArch64::BI__builtin_neon_vclz_v:
-  case AArch64::BI__builtin_neon_vclzq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vclz_v, E);
-  case AArch64::BI__builtin_neon_vcnt_v:
-  case AArch64::BI__builtin_neon_vcntq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcnt_v, E);
-  case AArch64::BI__builtin_neon_vrbit_v:
-  case AArch64::BI__builtin_neon_vrbitq_v:
-    Int = Intrinsic::aarch64_neon_rbit;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
-  case AArch64::BI__builtin_neon_vmovn_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovn_v, E);
-  case AArch64::BI__builtin_neon_vqmovun_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovun_v, E);
-  case AArch64::BI__builtin_neon_vqmovn_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovn_v, E);
-  case AArch64::BI__builtin_neon_vcvt_f16_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f16_v, E);
-  case AArch64::BI__builtin_neon_vcvt_f32_f16:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_f16, E);
-  case AArch64::BI__builtin_neon_vcvt_f32_f64: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
-    return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
-  }
-  case AArch64::BI__builtin_neon_vcvtx_f32_v: {
-    llvm::Type *EltTy = FloatTy;
-    llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2);
-    llvm::Type *Tys[2] = { ResTy, Ty };
-    Int = Intrinsic::aarch64_neon_fcvtxn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvt_f64_f32: {
-    llvm::Type *OpTy =
-        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
-    Ops[0] = Builder.CreateBitCast(Ops[0], OpTy);
-    return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
-  }
-  case AArch64::BI__builtin_neon_vcvt_f64_v:
-  case AArch64::BI__builtin_neon_vcvtq_f64_v: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
-    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
-                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
-  }
-  case AArch64::BI__builtin_neon_vrndn_v:
-  case AArch64::BI__builtin_neon_vrndnq_v: {
-    Int = Intrinsic::aarch64_neon_frintn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
-  }
-  case AArch64::BI__builtin_neon_vrnda_v:
-  case AArch64::BI__builtin_neon_vrndaq_v: {
-    Int = Intrinsic::round;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
-  }
-  case AArch64::BI__builtin_neon_vrndp_v:
-  case AArch64::BI__builtin_neon_vrndpq_v: {
-    Int = Intrinsic::ceil;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
-  }
-  case AArch64::BI__builtin_neon_vrndm_v:
-  case AArch64::BI__builtin_neon_vrndmq_v: {
-    Int = Intrinsic::floor;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
-  }
-  case AArch64::BI__builtin_neon_vrndx_v:
-  case AArch64::BI__builtin_neon_vrndxq_v: {
-    Int = Intrinsic::rint;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
+    return SV;
   }
-  case AArch64::BI__builtin_neon_vrnd_v:
-  case AArch64::BI__builtin_neon_vrndq_v: {
-    Int = Intrinsic::trunc;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd");
   }
-  case AArch64::BI__builtin_neon_vrndi_v:
-  case AArch64::BI__builtin_neon_vrndiq_v: {
-    Int = Intrinsic::nearbyint;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
+
+  assert(Int && "Expected valid intrinsic number");
+
+  // Determine the type(s) of this overloaded AArch64 intrinsic.
+  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
+
+  Value *Result = EmitNeonCall(F, Ops, NameHint);
+  llvm::Type *ResultType = ConvertType(E->getType());
+  // AArch64 intrinsic one-element vector type cast to
+  // scalar type expected by the builtin
+  return Builder.CreateBitCast(Result, ResultType, NameHint);
+}
+
+Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
+    Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
+    const CmpInst::Predicate Ip, const Twine &Name) {
+  llvm::Type *OTy = Op->getType();
+
+  // FIXME: this is utterly horrific. We should not be looking at previous
+  // codegen context to find out what needs doing. Unfortunately TableGen
+  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
+  // (etc).
+  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
+    OTy = BI->getOperand(0)->getType();
+
+  Op = Builder.CreateBitCast(Op, OTy);
+  if (OTy->getScalarType()->isFloatingPointTy()) {
+    Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
+  } else {
+    Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
   }
-  case AArch64::BI__builtin_neon_vcvt_s32_v:
-  case AArch64::BI__builtin_neon_vcvt_u32_v:
-  case AArch64::BI__builtin_neon_vcvtq_s32_v:
-  case AArch64::BI__builtin_neon_vcvtq_u32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_u32_v, E);
-  case AArch64::BI__builtin_neon_vcvt_s64_v:
-  case AArch64::BI__builtin_neon_vcvt_u64_v:
-  case AArch64::BI__builtin_neon_vcvtq_s64_v:
-  case AArch64::BI__builtin_neon_vcvtq_u64_v: {
-    llvm::Type *DoubleTy =
-        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
-    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
-    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
-                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
+  return Builder.CreateSExt(Op, Ty, Name);
+}
+
+static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+                                 Value *ExtOp, Value *IndexOp,
+                                 llvm::Type *ResTy, unsigned IntID,
+                                 const char *Name) {
+  SmallVector<Value *, 2> TblOps;
+  if (ExtOp)
+    TblOps.push_back(ExtOp);
+
+  // Build a vector containing sequential number like (0, 1, 2, ..., 15)  
+  SmallVector<Constant*, 16> Indices;
+  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
+  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
+    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
+    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
   }
-  case AArch64::BI__builtin_neon_vcvtn_s32_v:
-  case AArch64::BI__builtin_neon_vcvtnq_s32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtns;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvtn_s64_v:
-  case AArch64::BI__builtin_neon_vcvtnq_s64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtns;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvtn_u32_v:
-  case AArch64::BI__builtin_neon_vcvtnq_u32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtnu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvtn_u64_v:
-  case AArch64::BI__builtin_neon_vcvtnq_u64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtnu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvtp_s32_v:
-  case AArch64::BI__builtin_neon_vcvtpq_s32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtps;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvtp_s64_v:
-  case AArch64::BI__builtin_neon_vcvtpq_s64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtps;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvtp_u32_v:
-  case AArch64::BI__builtin_neon_vcvtpq_u32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtpu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvtp_u64_v:
-  case AArch64::BI__builtin_neon_vcvtpq_u64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtpu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvtm_s32_v:
-  case AArch64::BI__builtin_neon_vcvtmq_s32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtms;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvtm_s64_v:
-  case AArch64::BI__builtin_neon_vcvtmq_s64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtms;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvtm_u32_v:
-  case AArch64::BI__builtin_neon_vcvtmq_u32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtmu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvtm_u64_v:
-  case AArch64::BI__builtin_neon_vcvtmq_u64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtmu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvta_s32_v:
-  case AArch64::BI__builtin_neon_vcvtaq_s32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtas;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvta_s64_v:
-  case AArch64::BI__builtin_neon_vcvtaq_s64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtas;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f64");
-  }
-  case AArch64::BI__builtin_neon_vcvta_u32_v:
-  case AArch64::BI__builtin_neon_vcvtaq_u32_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtau;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f32");
-  }
-  case AArch64::BI__builtin_neon_vcvta_u64_v:
-  case AArch64::BI__builtin_neon_vcvtaq_u64_v: {
-    llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
-    llvm::Type *Tys[2] = { Ty, OpTy };
-    Int = Intrinsic::aarch64_neon_fcvtau;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f64");
-  }
-  case AArch64::BI__builtin_neon_vrecpe_v:
-  case AArch64::BI__builtin_neon_vrecpeq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpe_v, E);
-  case AArch64::BI__builtin_neon_vrsqrte_v:
-  case AArch64::BI__builtin_neon_vrsqrteq_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrte_v, E);
-  case AArch64::BI__builtin_neon_vsqrt_v:
-  case AArch64::BI__builtin_neon_vsqrtq_v: {
-    Int = Intrinsic::sqrt;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
+  Value *SV = llvm::ConstantVector::get(Indices);
+
+  int PairPos = 0, End = Ops.size() - 1;
+  while (PairPos < End) {
+    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
+                                                     Ops[PairPos+1], SV, Name));
+    PairPos += 2;
   }
-  case AArch64::BI__builtin_neon_vcvt_f32_v:
-  case AArch64::BI__builtin_neon_vcvtq_f32_v:
-    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_v, E);
-  case AArch64::BI__builtin_neon_vceqz_v:
-  case AArch64::BI__builtin_neon_vceqzq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
-                                         ICmpInst::ICMP_EQ, "vceqz");
-  case AArch64::BI__builtin_neon_vcgez_v:
-  case AArch64::BI__builtin_neon_vcgezq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
-                                         ICmpInst::ICMP_SGE, "vcgez");
-  case AArch64::BI__builtin_neon_vclez_v:
-  case AArch64::BI__builtin_neon_vclezq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
-                                         ICmpInst::ICMP_SLE, "vclez");
-  case AArch64::BI__builtin_neon_vcgtz_v:
-  case AArch64::BI__builtin_neon_vcgtzq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
-                                         ICmpInst::ICMP_SGT, "vcgtz");
-  case AArch64::BI__builtin_neon_vcltz_v:
-  case AArch64::BI__builtin_neon_vcltzq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
-                                         ICmpInst::ICMP_SLT, "vcltz");
+
+  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
+  // of the 128-bit lookup table with zero.
+  if (PairPos == End) {
+    Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
+    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
+                                                     ZeroTbl, SV, Name));
   }
+
+  Function *TblF;
+  TblOps.push_back(IndexOp);
+  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
+  
+  return CGF.EmitNeonCall(TblF, TblOps, Name);
 }
 
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
+  unsigned HintID = static_cast<unsigned>(-1);
+  switch (BuiltinID) {
+  default: break;
+  case ARM::BI__builtin_arm_nop:
+    HintID = 0;
+    break;
+  case ARM::BI__builtin_arm_yield:
+  case ARM::BI__yield:
+    HintID = 1;
+    break;
+  case ARM::BI__builtin_arm_wfe:
+  case ARM::BI__wfe:
+    HintID = 2;
+    break;
+  case ARM::BI__builtin_arm_wfi:
+  case ARM::BI__wfi:
+    HintID = 3;
+    break;
+  case ARM::BI__builtin_arm_sev:
+  case ARM::BI__sev:
+    HintID = 4;
+    break;
+  case ARM::BI__builtin_arm_sevl:
+  case ARM::BI__sevl:
+    HintID = 5;
+    break;
+  }
+
+  if (HintID != static_cast<unsigned>(-1)) {
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_hint);
+    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
+  }
+
+  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
+                                               EmitScalarExpr(E->getArg(0)),
+                              "rbit");
+  }
+
   if (BuiltinID == ARM::BI__clear_cache) {
     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
     const FunctionDecl *FD = E->getDirectCallee();
@@ -3938,9 +3093,23 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
-      (BuiltinID == ARM::BI__builtin_arm_ldrex &&
-       getContext().getTypeSize(E->getType()) == 64)) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
+      ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
+        BuiltinID == ARM::BI__builtin_arm_ldaex) &&
+       getContext().getTypeSize(E->getType()) == 64) ||
+      BuiltinID == ARM::BI__ldrexd) {
+    Function *F;
+
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin");
+    case ARM::BI__builtin_arm_ldaex:
+      F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
+      break;
+    case ARM::BI__builtin_arm_ldrexd:
+    case ARM::BI__builtin_arm_ldrex:
+    case ARM::BI__ldrexd:
+      F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
+      break;
+    }
 
     Value *LdPtr = EmitScalarExpr(E->getArg(0));
     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
@@ -3957,7 +3126,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
   }
 
-  if (BuiltinID == ARM::BI__builtin_arm_ldrex) {
+  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
+      BuiltinID == ARM::BI__builtin_arm_ldaex) {
     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
 
     QualType Ty = E->getType();
@@ -3966,7 +3136,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                                   getContext().getTypeSize(Ty));
     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
 
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrex, LoadAddr->getType());
+    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
+                                       ? Intrinsic::arm_ldaex
+                                       : Intrinsic::arm_ldrex,
+                                   LoadAddr->getType());
     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
 
     if (RealResTy->isPointerTy())
@@ -3978,9 +3151,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
-      (BuiltinID == ARM::BI__builtin_arm_strex &&
+      ((BuiltinID == ARM::BI__builtin_arm_stlex ||
+        BuiltinID == ARM::BI__builtin_arm_strex) &&
        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
+    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
+                                       ? Intrinsic::arm_stlexd
+                                       : Intrinsic::arm_strexd);
     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
 
     Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
@@ -3996,7 +3172,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
   }
 
-  if (BuiltinID == ARM::BI__builtin_arm_strex) {
+  if (BuiltinID == ARM::BI__builtin_arm_strex ||
+      BuiltinID == ARM::BI__builtin_arm_stlex) {
     Value *StoreVal = EmitScalarExpr(E->getArg(0));
     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
 
@@ -4012,7 +3189,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
     }
 
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_strex, StoreAddr->getType());
+    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
+                                       ? Intrinsic::arm_stlex
+                                       : Intrinsic::arm_strex,
+                                   StoreAddr->getType());
     return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex");
   }
 
@@ -4021,11 +3201,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F);
   }
 
-  if (BuiltinID == ARM::BI__builtin_arm_sevl) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_sevl);
-    return Builder.CreateCall(F);
-  }
-
   // CRC32
   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
@@ -4070,32 +3245,32 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   }
 
   SmallVector<Value*, 4> Ops;
-  llvm::Value *Align = 0;
+  llvm::Value *Align = nullptr;
   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
     if (i == 0) {
       switch (BuiltinID) {
-      case ARM::BI__builtin_neon_vld1_v:
-      case ARM::BI__builtin_neon_vld1q_v:
-      case ARM::BI__builtin_neon_vld1q_lane_v:
-      case ARM::BI__builtin_neon_vld1_lane_v:
-      case ARM::BI__builtin_neon_vld1_dup_v:
-      case ARM::BI__builtin_neon_vld1q_dup_v:
-      case ARM::BI__builtin_neon_vst1_v:
-      case ARM::BI__builtin_neon_vst1q_v:
-      case ARM::BI__builtin_neon_vst1q_lane_v:
-      case ARM::BI__builtin_neon_vst1_lane_v:
-      case ARM::BI__builtin_neon_vst2_v:
-      case ARM::BI__builtin_neon_vst2q_v:
-      case ARM::BI__builtin_neon_vst2_lane_v:
-      case ARM::BI__builtin_neon_vst2q_lane_v:
-      case ARM::BI__builtin_neon_vst3_v:
-      case ARM::BI__builtin_neon_vst3q_v:
-      case ARM::BI__builtin_neon_vst3_lane_v:
-      case ARM::BI__builtin_neon_vst3q_lane_v:
-      case ARM::BI__builtin_neon_vst4_v:
-      case ARM::BI__builtin_neon_vst4q_v:
-      case ARM::BI__builtin_neon_vst4_lane_v:
-      case ARM::BI__builtin_neon_vst4q_lane_v:
+      case NEON::BI__builtin_neon_vld1_v:
+      case NEON::BI__builtin_neon_vld1q_v:
+      case NEON::BI__builtin_neon_vld1q_lane_v:
+      case NEON::BI__builtin_neon_vld1_lane_v:
+      case NEON::BI__builtin_neon_vld1_dup_v:
+      case NEON::BI__builtin_neon_vld1q_dup_v:
+      case NEON::BI__builtin_neon_vst1_v:
+      case NEON::BI__builtin_neon_vst1q_v:
+      case NEON::BI__builtin_neon_vst1q_lane_v:
+      case NEON::BI__builtin_neon_vst1_lane_v:
+      case NEON::BI__builtin_neon_vst2_v:
+      case NEON::BI__builtin_neon_vst2q_v:
+      case NEON::BI__builtin_neon_vst2_lane_v:
+      case NEON::BI__builtin_neon_vst2q_lane_v:
+      case NEON::BI__builtin_neon_vst3_v:
+      case NEON::BI__builtin_neon_vst3q_v:
+      case NEON::BI__builtin_neon_vst3_lane_v:
+      case NEON::BI__builtin_neon_vst3q_lane_v:
+      case NEON::BI__builtin_neon_vst4_v:
+      case NEON::BI__builtin_neon_vst4q_v:
+      case NEON::BI__builtin_neon_vst4_lane_v:
+      case NEON::BI__builtin_neon_vst4q_lane_v:
         // Get the alignment for the argument in addition to the value;
         // we'll use it later.
         std::pair<llvm::Value*, unsigned> Src =
@@ -4107,21 +3282,21 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     }
     if (i == 1) {
       switch (BuiltinID) {
-      case ARM::BI__builtin_neon_vld2_v:
-      case ARM::BI__builtin_neon_vld2q_v:
-      case ARM::BI__builtin_neon_vld3_v:
-      case ARM::BI__builtin_neon_vld3q_v:
-      case ARM::BI__builtin_neon_vld4_v:
-      case ARM::BI__builtin_neon_vld4q_v:
-      case ARM::BI__builtin_neon_vld2_lane_v:
-      case ARM::BI__builtin_neon_vld2q_lane_v:
-      case ARM::BI__builtin_neon_vld3_lane_v:
-      case ARM::BI__builtin_neon_vld3q_lane_v:
-      case ARM::BI__builtin_neon_vld4_lane_v:
-      case ARM::BI__builtin_neon_vld4q_lane_v:
-      case ARM::BI__builtin_neon_vld2_dup_v:
-      case ARM::BI__builtin_neon_vld3_dup_v:
-      case ARM::BI__builtin_neon_vld4_dup_v:
+      case NEON::BI__builtin_neon_vld2_v:
+      case NEON::BI__builtin_neon_vld2q_v:
+      case NEON::BI__builtin_neon_vld3_v:
+      case NEON::BI__builtin_neon_vld3q_v:
+      case NEON::BI__builtin_neon_vld4_v:
+      case NEON::BI__builtin_neon_vld4q_v:
+      case NEON::BI__builtin_neon_vld2_lane_v:
+      case NEON::BI__builtin_neon_vld2q_lane_v:
+      case NEON::BI__builtin_neon_vld3_lane_v:
+      case NEON::BI__builtin_neon_vld3q_lane_v:
+      case NEON::BI__builtin_neon_vld4_lane_v:
+      case NEON::BI__builtin_neon_vld4q_lane_v:
+      case NEON::BI__builtin_neon_vld2_dup_v:
+      case NEON::BI__builtin_neon_vld3_dup_v:
+      case NEON::BI__builtin_neon_vld4_dup_v:
         // Get the alignment for the argument in addition to the value;
         // we'll use it later.
         std::pair<llvm::Value*, unsigned> Src =
@@ -4134,41 +3309,59 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   }
 
-  // vget_lane and vset_lane are not overloaded and do not have an extra
-  // argument that specifies the vector type.
   switch (BuiltinID) {
   default: break;
-  case ARM::BI__builtin_neon_vget_lane_i8:
-  case ARM::BI__builtin_neon_vget_lane_i16:
-  case ARM::BI__builtin_neon_vget_lane_i32:
-  case ARM::BI__builtin_neon_vget_lane_i64:
-  case ARM::BI__builtin_neon_vget_lane_f32:
-  case ARM::BI__builtin_neon_vgetq_lane_i8:
-  case ARM::BI__builtin_neon_vgetq_lane_i16:
-  case ARM::BI__builtin_neon_vgetq_lane_i32:
-  case ARM::BI__builtin_neon_vgetq_lane_i64:
-  case ARM::BI__builtin_neon_vgetq_lane_f32:
+  // vget_lane and vset_lane are not overloaded and do not have an extra
+  // argument that specifies the vector type.
+  case NEON::BI__builtin_neon_vget_lane_i8:
+  case NEON::BI__builtin_neon_vget_lane_i16:
+  case NEON::BI__builtin_neon_vget_lane_i32:
+  case NEON::BI__builtin_neon_vget_lane_i64:
+  case NEON::BI__builtin_neon_vget_lane_f32:
+  case NEON::BI__builtin_neon_vgetq_lane_i8:
+  case NEON::BI__builtin_neon_vgetq_lane_i16:
+  case NEON::BI__builtin_neon_vgetq_lane_i32:
+  case NEON::BI__builtin_neon_vgetq_lane_i64:
+  case NEON::BI__builtin_neon_vgetq_lane_f32:
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vget_lane");
-  case ARM::BI__builtin_neon_vset_lane_i8:
-  case ARM::BI__builtin_neon_vset_lane_i16:
-  case ARM::BI__builtin_neon_vset_lane_i32:
-  case ARM::BI__builtin_neon_vset_lane_i64:
-  case ARM::BI__builtin_neon_vset_lane_f32:
-  case ARM::BI__builtin_neon_vsetq_lane_i8:
-  case ARM::BI__builtin_neon_vsetq_lane_i16:
-  case ARM::BI__builtin_neon_vsetq_lane_i32:
-  case ARM::BI__builtin_neon_vsetq_lane_i64:
-  case ARM::BI__builtin_neon_vsetq_lane_f32:
+  case NEON::BI__builtin_neon_vset_lane_i8:
+  case NEON::BI__builtin_neon_vset_lane_i16:
+  case NEON::BI__builtin_neon_vset_lane_i32:
+  case NEON::BI__builtin_neon_vset_lane_i64:
+  case NEON::BI__builtin_neon_vset_lane_f32:
+  case NEON::BI__builtin_neon_vsetq_lane_i8:
+  case NEON::BI__builtin_neon_vsetq_lane_i16:
+  case NEON::BI__builtin_neon_vsetq_lane_i32:
+  case NEON::BI__builtin_neon_vsetq_lane_i64:
+  case NEON::BI__builtin_neon_vsetq_lane_f32:
     Ops.push_back(EmitScalarExpr(E->getArg(2)));
     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+
+  // Non-polymorphic crypto instructions also not overloaded
+  case NEON::BI__builtin_neon_vsha1h_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
+                        "vsha1h");
+  case NEON::BI__builtin_neon_vsha1cq_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
+                        "vsha1h");
+  case NEON::BI__builtin_neon_vsha1pq_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
+                        "vsha1h");
+  case NEON::BI__builtin_neon_vsha1mq_u32:
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
+                        "vsha1h");
   }
 
   // Get the last argument, which specifies the vector type.
   llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs()-1);
   if (!Arg->isIntegerConstantExpr(Result, getContext()))
-    return 0;
+    return nullptr;
 
   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
@@ -4191,166 +3384,27 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   // Determine the type of this overloaded NEON intrinsic.
   NeonTypeFlags Type(Result.getZExtValue());
   bool usgn = Type.isUnsigned();
-  bool quad = Type.isQuad();
   bool rightShift = false;
 
   llvm::VectorType *VTy = GetNeonType(this, Type);
   llvm::Type *Ty = VTy;
   if (!Ty)
-    return 0;
+    return nullptr;
+
+  // Many NEON builtins have identical semantics and uses in ARM and
+  // AArch64. Emit these in a single function.
+  ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap);
+  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+      IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
+  if (Builtin)
+    return EmitCommonNeonBuiltinExpr(
+        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
+        Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
 
   unsigned Int;
   switch (BuiltinID) {
-  default: return 0;
-  case ARM::BI__builtin_neon_vbsl_v:
-  case ARM::BI__builtin_neon_vbslq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
-                        Ops, "vbsl");
-  case ARM::BI__builtin_neon_vabd_v:
-  case ARM::BI__builtin_neon_vabdq_v:
-    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
-  case ARM::BI__builtin_neon_vabs_v:
-  case ARM::BI__builtin_neon_vabsq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
-                        Ops, "vabs");
-  case ARM::BI__builtin_neon_vaddhn_v: {
-    llvm::VectorType *SrcTy =
-        llvm::VectorType::getExtendedElementVectorType(VTy);
-
-    // %sum = add <4 x i32> %lhs, %rhs
-    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
-    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
-    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
-
-    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
-    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
-                                       SrcTy->getScalarSizeInBits() / 2);
-    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
-    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
-
-    // %res = trunc <4 x i32> %high to <4 x i16>
-    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
-  }
-  case ARM::BI__builtin_neon_vcale_v:
-    std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcage_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
-    return EmitNeonCall(F, Ops, "vcage");
-  }
-  case ARM::BI__builtin_neon_vcaleq_v:
-    std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcageq_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
-    return EmitNeonCall(F, Ops, "vcage");
-  }
-  case ARM::BI__builtin_neon_vcalt_v:
-    std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcagt_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
-    return EmitNeonCall(F, Ops, "vcagt");
-  }
-  case ARM::BI__builtin_neon_vcaltq_v:
-    std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcagtq_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
-    return EmitNeonCall(F, Ops, "vcagt");
-  }
-  case ARM::BI__builtin_neon_vcls_v:
-  case ARM::BI__builtin_neon_vclsq_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
-    return EmitNeonCall(F, Ops, "vcls");
-  }
-  case ARM::BI__builtin_neon_vclz_v:
-  case ARM::BI__builtin_neon_vclzq_v: {
-    // Generate target-independent intrinsic; also need to add second argument
-    // for whether or not clz of zero is undefined; on ARM it isn't.
-    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
-    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
-    return EmitNeonCall(F, Ops, "vclz");
-  }
-  case ARM::BI__builtin_neon_vcnt_v:
-  case ARM::BI__builtin_neon_vcntq_v: {
-    // generate target-independent intrinsic
-    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
-    return EmitNeonCall(F, Ops, "vctpop");
-  }
-  case ARM::BI__builtin_neon_vcvt_f16_v: {
-    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
-           "unexpected vcvt_f16_v builtin");
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
-    return EmitNeonCall(F, Ops, "vcvt");
-  }
-  case ARM::BI__builtin_neon_vcvt_f32_f16: {
-    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
-           "unexpected vcvt_f32_f16 builtin");
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
-    return EmitNeonCall(F, Ops, "vcvt");
-  }
-  case ARM::BI__builtin_neon_vcvt_f32_v:
-  case ARM::BI__builtin_neon_vcvtq_f32_v:
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
-    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
-                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
-  case ARM::BI__builtin_neon_vcvt_s32_v:
-  case ARM::BI__builtin_neon_vcvt_u32_v:
-  case ARM::BI__builtin_neon_vcvtq_s32_v:
-  case ARM::BI__builtin_neon_vcvtq_u32_v: {
-    llvm::Type *FloatTy =
-      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
-    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
-    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
-                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
-  }
-  case ARM::BI__builtin_neon_vcvt_n_f32_v:
-  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
-    llvm::Type *FloatTy =
-      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
-    llvm::Type *Tys[2] = { FloatTy, Ty };
-    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
-               : Intrinsic::arm_neon_vcvtfxs2fp;
-    Function *F = CGM.getIntrinsic(Int, Tys);
-    return EmitNeonCall(F, Ops, "vcvt_n");
-  }
-  case ARM::BI__builtin_neon_vcvt_n_s32_v:
-  case ARM::BI__builtin_neon_vcvt_n_u32_v:
-  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
-  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
-    llvm::Type *FloatTy =
-      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
-    llvm::Type *Tys[2] = { Ty, FloatTy };
-    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
-               : Intrinsic::arm_neon_vcvtfp2fxs;
-    Function *F = CGM.getIntrinsic(Int, Tys);
-    return EmitNeonCall(F, Ops, "vcvt_n");
-  }
-  case ARM::BI__builtin_neon_vext_v:
-  case ARM::BI__builtin_neon_vextq_v: {
-    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
-    SmallVector<Constant*, 16> Indices;
-    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
-
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Value *SV = llvm::ConstantVector::get(Indices);
-    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
-  }
-  case ARM::BI__builtin_neon_vhadd_v:
-  case ARM::BI__builtin_neon_vhaddq_v:
-    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
-  case ARM::BI__builtin_neon_vhsub_v:
-  case ARM::BI__builtin_neon_vhsubq_v:
-    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
-  case ARM::BI__builtin_neon_vld1_v:
-  case ARM::BI__builtin_neon_vld1q_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
-                        Ops, "vld1");
-  case ARM::BI__builtin_neon_vld1q_lane_v:
+  default: return nullptr;
+  case NEON::BI__builtin_neon_vld1q_lane_v:
     // Handle 64-bit integer elements as a special case.  Use shuffles of
     // one-element vectors to avoid poor code for i64 in the backend.
     if (VTy->getElementType()->isIntegerTy(64)) {
@@ -4371,7 +3425,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
     }
     // fall through
-  case ARM::BI__builtin_neon_vld1_lane_v: {
+  case NEON::BI__builtin_neon_vld1_lane_v: {
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
@@ -4379,90 +3433,19 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
   }
-  case ARM::BI__builtin_neon_vld1_dup_v:
-  case ARM::BI__builtin_neon_vld1q_dup_v: {
-    Value *V = UndefValue::get(Ty);
-    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
-    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
-    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
-    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
-    return EmitNeonSplat(Ops[0], CI);
-  }
-  case ARM::BI__builtin_neon_vld2_v:
-  case ARM::BI__builtin_neon_vld2q_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
-    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-  case ARM::BI__builtin_neon_vld3_v:
-  case ARM::BI__builtin_neon_vld3q_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
-    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-  case ARM::BI__builtin_neon_vld4_v:
-  case ARM::BI__builtin_neon_vld4q_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
-    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-  case ARM::BI__builtin_neon_vld2_lane_v:
-  case ARM::BI__builtin_neon_vld2q_lane_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
-    Ops.push_back(Align);
-    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-  case ARM::BI__builtin_neon_vld3_lane_v:
-  case ARM::BI__builtin_neon_vld3q_lane_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
-    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
-    Ops.push_back(Align);
-    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-  case ARM::BI__builtin_neon_vld4_lane_v:
-  case ARM::BI__builtin_neon_vld4q_lane_v: {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
-    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
-    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
-    Ops.push_back(Align);
-    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-  case ARM::BI__builtin_neon_vld2_dup_v:
-  case ARM::BI__builtin_neon_vld3_dup_v:
-  case ARM::BI__builtin_neon_vld4_dup_v: {
+  case NEON::BI__builtin_neon_vld2_dup_v:
+  case NEON::BI__builtin_neon_vld3_dup_v:
+  case NEON::BI__builtin_neon_vld4_dup_v: {
     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
       switch (BuiltinID) {
-      case ARM::BI__builtin_neon_vld2_dup_v:
+      case NEON::BI__builtin_neon_vld2_dup_v:
         Int = Intrinsic::arm_neon_vld2;
         break;
-      case ARM::BI__builtin_neon_vld3_dup_v:
+      case NEON::BI__builtin_neon_vld3_dup_v:
         Int = Intrinsic::arm_neon_vld3;
         break;
-      case ARM::BI__builtin_neon_vld4_dup_v:
+      case NEON::BI__builtin_neon_vld4_dup_v:
         Int = Intrinsic::arm_neon_vld4;
         break;
       default: llvm_unreachable("unknown vld_dup intrinsic?");
@@ -4474,13 +3457,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       return Builder.CreateStore(Ops[1], Ops[0]);
     }
     switch (BuiltinID) {
-    case ARM::BI__builtin_neon_vld2_dup_v:
+    case NEON::BI__builtin_neon_vld2_dup_v:
       Int = Intrinsic::arm_neon_vld2lane;
       break;
-    case ARM::BI__builtin_neon_vld3_dup_v:
+    case NEON::BI__builtin_neon_vld3_dup_v:
       Int = Intrinsic::arm_neon_vld3lane;
       break;
-    case ARM::BI__builtin_neon_vld4_dup_v:
+    case NEON::BI__builtin_neon_vld4_dup_v:
       Int = Intrinsic::arm_neon_vld4lane;
       break;
     default: llvm_unreachable("unknown vld_dup intrinsic?");
@@ -4509,251 +3492,58 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     return Builder.CreateStore(Ops[1], Ops[0]);
   }
-  case ARM::BI__builtin_neon_vmax_v:
-  case ARM::BI__builtin_neon_vmaxq_v:
-    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
-  case ARM::BI__builtin_neon_vmin_v:
-  case ARM::BI__builtin_neon_vminq_v:
-    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
-  case ARM::BI__builtin_neon_vmovl_v: {
-    llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
-    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
-    if (usgn)
-      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
-    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
-  }
-  case ARM::BI__builtin_neon_vmovn_v: {
-    llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
-    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
-    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
-  }
-  case ARM::BI__builtin_neon_vmul_v:
-  case ARM::BI__builtin_neon_vmulq_v:
-    assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
-                        Ops, "vmul");
-  case ARM::BI__builtin_neon_vmull_v:
-    // FIXME: the integer vmull operations could be emitted in terms of pure
-    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
-    // hoisting the exts outside loops. Until global ISel comes along that can
-    // see through such movement this leads to bad CodeGen. So we need an
-    // intrinsic for now.
-    Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
-    Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
-  case ARM::BI__builtin_neon_vfma_v:
-  case ARM::BI__builtin_neon_vfmaq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-
-    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
-    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-  }
-  case ARM::BI__builtin_neon_vpadal_v:
-  case ARM::BI__builtin_neon_vpadalq_v: {
-    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
-    // The source operand type has twice as many elements of half the size.
-    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    llvm::Type *EltTy =
-      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
-    llvm::Type *NarrowTy =
-      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
-    llvm::Type *Tys[2] = { Ty, NarrowTy };
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
-  }
-  case ARM::BI__builtin_neon_vpadd_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
-                        Ops, "vpadd");
-  case ARM::BI__builtin_neon_vpaddl_v:
-  case ARM::BI__builtin_neon_vpaddlq_v: {
-    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
-    // The source operand type has twice as many elements of half the size.
-    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
-    llvm::Type *NarrowTy =
-      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
-    llvm::Type *Tys[2] = { Ty, NarrowTy };
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
-  }
-  case ARM::BI__builtin_neon_vpmax_v:
-    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
-  case ARM::BI__builtin_neon_vpmin_v:
-    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
-  case ARM::BI__builtin_neon_vqabs_v:
-  case ARM::BI__builtin_neon_vqabsq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
-                        Ops, "vqabs");
-  case ARM::BI__builtin_neon_vqadd_v:
-  case ARM::BI__builtin_neon_vqaddq_v:
-    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
-  case ARM::BI__builtin_neon_vqdmlal_v: {
-    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
-    Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
-                              MulOps, "vqdmlal");
-
-    SmallVector<Value *, 2> AddOps;
-    AddOps.push_back(Ops[0]);
-    AddOps.push_back(Mul);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty),
-                        AddOps, "vqdmlal");
-  }
-  case ARM::BI__builtin_neon_vqdmlsl_v: {
-    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
-    Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
-                              MulOps, "vqdmlsl");
-
-    SmallVector<Value *, 2> SubOps;
-    SubOps.push_back(Ops[0]);
-    SubOps.push_back(Mul);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty),
-                        SubOps, "vqdmlsl");
-  }
-  case ARM::BI__builtin_neon_vqdmulh_v:
-  case ARM::BI__builtin_neon_vqdmulhq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
-                        Ops, "vqdmulh");
-  case ARM::BI__builtin_neon_vqdmull_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
-                        Ops, "vqdmull");
-  case ARM::BI__builtin_neon_vqmovn_v:
-    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
-  case ARM::BI__builtin_neon_vqmovun_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
-                        Ops, "vqdmull");
-  case ARM::BI__builtin_neon_vqneg_v:
-  case ARM::BI__builtin_neon_vqnegq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
-                        Ops, "vqneg");
-  case ARM::BI__builtin_neon_vqrdmulh_v:
-  case ARM::BI__builtin_neon_vqrdmulhq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
-                        Ops, "vqrdmulh");
-  case ARM::BI__builtin_neon_vqrshl_v:
-  case ARM::BI__builtin_neon_vqrshlq_v:
-    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
-  case ARM::BI__builtin_neon_vqrshrn_n_v:
+  case NEON::BI__builtin_neon_vqrshrn_n_v:
     Int =
       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
                         1, true);
-  case ARM::BI__builtin_neon_vqrshrun_n_v:
+  case NEON::BI__builtin_neon_vqrshrun_n_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
                         Ops, "vqrshrun_n", 1, true);
-  case ARM::BI__builtin_neon_vqshl_v:
-  case ARM::BI__builtin_neon_vqshlq_v:
-    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
-  case ARM::BI__builtin_neon_vqshl_n_v:
-  case ARM::BI__builtin_neon_vqshlq_n_v:
-    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
-                        1, false);
-  case ARM::BI__builtin_neon_vqshlu_n_v:
-  case ARM::BI__builtin_neon_vqshluq_n_v:
+  case NEON::BI__builtin_neon_vqshlu_n_v:
+  case NEON::BI__builtin_neon_vqshluq_n_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
                         Ops, "vqshlu", 1, false);
-  case ARM::BI__builtin_neon_vqshrn_n_v:
+  case NEON::BI__builtin_neon_vqshrn_n_v:
     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
                         1, true);
-  case ARM::BI__builtin_neon_vqshrun_n_v:
+  case NEON::BI__builtin_neon_vqshrun_n_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
                         Ops, "vqshrun_n", 1, true);
-  case ARM::BI__builtin_neon_vqsub_v:
-  case ARM::BI__builtin_neon_vqsubq_v:
-    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
-  case ARM::BI__builtin_neon_vraddhn_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
-                        Ops, "vraddhn");
-  case ARM::BI__builtin_neon_vrecpe_v:
-  case ARM::BI__builtin_neon_vrecpeq_v:
+  case NEON::BI__builtin_neon_vrecpe_v:
+  case NEON::BI__builtin_neon_vrecpeq_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
                         Ops, "vrecpe");
-  case ARM::BI__builtin_neon_vrecps_v:
-  case ARM::BI__builtin_neon_vrecpsq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
-                        Ops, "vrecps");
-  case ARM::BI__builtin_neon_vrhadd_v:
-  case ARM::BI__builtin_neon_vrhaddq_v:
-    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
-  case ARM::BI__builtin_neon_vrshl_v:
-  case ARM::BI__builtin_neon_vrshlq_v:
-    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
-  case ARM::BI__builtin_neon_vrshrn_n_v:
+  case NEON::BI__builtin_neon_vrshrn_n_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
                         Ops, "vrshrn_n", 1, true);
-  case ARM::BI__builtin_neon_vrshr_n_v:
-  case ARM::BI__builtin_neon_vrshrq_n_v:
+  case NEON::BI__builtin_neon_vrshr_n_v:
+  case NEON::BI__builtin_neon_vrshrq_n_v:
     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
-  case ARM::BI__builtin_neon_vrsqrte_v:
-  case ARM::BI__builtin_neon_vrsqrteq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
-                        Ops, "vrsqrte");
-  case ARM::BI__builtin_neon_vrsqrts_v:
-  case ARM::BI__builtin_neon_vrsqrtsq_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
-                        Ops, "vrsqrts");
-  case ARM::BI__builtin_neon_vrsra_n_v:
-  case ARM::BI__builtin_neon_vrsraq_n_v:
+  case NEON::BI__builtin_neon_vrsra_n_v:
+  case NEON::BI__builtin_neon_vrsraq_n_v:
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
     Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
-  case ARM::BI__builtin_neon_vrsubhn_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
-                        Ops, "vrsubhn");
-  case ARM::BI__builtin_neon_vshl_v:
-  case ARM::BI__builtin_neon_vshlq_v:
-    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
-  case ARM::BI__builtin_neon_vshll_n_v:
-    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
-  case ARM::BI__builtin_neon_vshl_n_v:
-  case ARM::BI__builtin_neon_vshlq_n_v:
-    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
-    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
-                             "vshl_n");
-  case ARM::BI__builtin_neon_vshrn_n_v:
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
-                        Ops, "vshrn_n", 1, true);
-  case ARM::BI__builtin_neon_vshr_n_v:
-  case ARM::BI__builtin_neon_vshrq_n_v:
-    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n");
-  case ARM::BI__builtin_neon_vsri_n_v:
-  case ARM::BI__builtin_neon_vsriq_n_v:
+  case NEON::BI__builtin_neon_vsri_n_v:
+  case NEON::BI__builtin_neon_vsriq_n_v:
     rightShift = true;
-  case ARM::BI__builtin_neon_vsli_n_v:
-  case ARM::BI__builtin_neon_vsliq_n_v:
+  case NEON::BI__builtin_neon_vsli_n_v:
+  case NEON::BI__builtin_neon_vsliq_n_v:
     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
                         Ops, "vsli_n");
-  case ARM::BI__builtin_neon_vsra_n_v:
-  case ARM::BI__builtin_neon_vsraq_n_v:
+  case NEON::BI__builtin_neon_vsra_n_v:
+  case NEON::BI__builtin_neon_vsraq_n_v:
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
     return Builder.CreateAdd(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vst1_v:
-  case ARM::BI__builtin_neon_vst1q_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
-                        Ops, "");
-  case ARM::BI__builtin_neon_vst1q_lane_v:
+  case NEON::BI__builtin_neon_vst1q_lane_v:
     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
     // a one-element vector and avoid poor code for i64 in the backend.
     if (VTy->getElementType()->isIntegerTy(64)) {
@@ -4765,7 +3555,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                                  Ops[1]->getType()), Ops);
     }
     // fall through
-  case ARM::BI__builtin_neon_vst1_lane_v: {
+  case NEON::BI__builtin_neon_vst1_lane_v: {
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
@@ -4774,99 +3564,1984 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
     return St;
   }
-  case ARM::BI__builtin_neon_vst2_v:
-  case ARM::BI__builtin_neon_vst2q_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
-                        Ops, "");
-  case ARM::BI__builtin_neon_vst2_lane_v:
-  case ARM::BI__builtin_neon_vst2q_lane_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
-                        Ops, "");
-  case ARM::BI__builtin_neon_vst3_v:
-  case ARM::BI__builtin_neon_vst3q_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
-                        Ops, "");
-  case ARM::BI__builtin_neon_vst3_lane_v:
-  case ARM::BI__builtin_neon_vst3q_lane_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
-                        Ops, "");
-  case ARM::BI__builtin_neon_vst4_v:
-  case ARM::BI__builtin_neon_vst4q_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
-                        Ops, "");
-  case ARM::BI__builtin_neon_vst4_lane_v:
-  case ARM::BI__builtin_neon_vst4q_lane_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
-                        Ops, "");
-  case ARM::BI__builtin_neon_vsubhn_v: {
-    llvm::VectorType *SrcTy =
-        llvm::VectorType::getExtendedElementVectorType(VTy);
-
-    // %sum = add <4 x i32> %lhs, %rhs
-    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
-    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
-    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
-
-    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
-    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
-                                       SrcTy->getScalarSizeInBits() / 2);
-    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
-    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
-
-    // %res = trunc <4 x i32> %high to <4 x i16>
-    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
-  }
-  case ARM::BI__builtin_neon_vtbl1_v:
+  case NEON::BI__builtin_neon_vtbl1_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
                         Ops, "vtbl1");
-  case ARM::BI__builtin_neon_vtbl2_v:
+  case NEON::BI__builtin_neon_vtbl2_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
                         Ops, "vtbl2");
-  case ARM::BI__builtin_neon_vtbl3_v:
+  case NEON::BI__builtin_neon_vtbl3_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
                         Ops, "vtbl3");
-  case ARM::BI__builtin_neon_vtbl4_v:
+  case NEON::BI__builtin_neon_vtbl4_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
                         Ops, "vtbl4");
-  case ARM::BI__builtin_neon_vtbx1_v:
+  case NEON::BI__builtin_neon_vtbx1_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
                         Ops, "vtbx1");
-  case ARM::BI__builtin_neon_vtbx2_v:
+  case NEON::BI__builtin_neon_vtbx2_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
                         Ops, "vtbx2");
-  case ARM::BI__builtin_neon_vtbx3_v:
+  case NEON::BI__builtin_neon_vtbx3_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
                         Ops, "vtbx3");
-  case ARM::BI__builtin_neon_vtbx4_v:
+  case NEON::BI__builtin_neon_vtbx4_v:
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
                         Ops, "vtbx4");
-  case ARM::BI__builtin_neon_vtst_v:
-  case ARM::BI__builtin_neon_vtstq_v: {
+  }
+}
+
+static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
+                                      const CallExpr *E,
+                                      SmallVectorImpl<Value *> &Ops) {
+  unsigned int Int = 0;
+  const char *s = nullptr;
+
+  switch (BuiltinID) {
+  default:
+    return nullptr;
+  case NEON::BI__builtin_neon_vtbl1_v:
+  case NEON::BI__builtin_neon_vqtbl1_v:
+  case NEON::BI__builtin_neon_vqtbl1q_v:
+  case NEON::BI__builtin_neon_vtbl2_v:
+  case NEON::BI__builtin_neon_vqtbl2_v:
+  case NEON::BI__builtin_neon_vqtbl2q_v:
+  case NEON::BI__builtin_neon_vtbl3_v:
+  case NEON::BI__builtin_neon_vqtbl3_v:
+  case NEON::BI__builtin_neon_vqtbl3q_v:
+  case NEON::BI__builtin_neon_vtbl4_v:
+  case NEON::BI__builtin_neon_vqtbl4_v:
+  case NEON::BI__builtin_neon_vqtbl4q_v:
+    break;
+  case NEON::BI__builtin_neon_vtbx1_v:
+  case NEON::BI__builtin_neon_vqtbx1_v:
+  case NEON::BI__builtin_neon_vqtbx1q_v:
+  case NEON::BI__builtin_neon_vtbx2_v:
+  case NEON::BI__builtin_neon_vqtbx2_v:
+  case NEON::BI__builtin_neon_vqtbx2q_v:
+  case NEON::BI__builtin_neon_vtbx3_v:
+  case NEON::BI__builtin_neon_vqtbx3_v:
+  case NEON::BI__builtin_neon_vqtbx3q_v:
+  case NEON::BI__builtin_neon_vtbx4_v:
+  case NEON::BI__builtin_neon_vqtbx4_v:
+  case NEON::BI__builtin_neon_vqtbx4q_v:
+    break;
+  }
+
+  assert(E->getNumArgs() >= 3);
+
+  // Get the last argument, which specifies the vector type.
+  llvm::APSInt Result;
+  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+    return nullptr;
+
+  // Determine the type of this overloaded NEON intrinsic.
+  NeonTypeFlags Type(Result.getZExtValue());
+  llvm::VectorType *VTy = GetNeonType(&CGF, Type);
+  llvm::Type *Ty = VTy;
+  if (!Ty)
+    return nullptr;
+
+  unsigned nElts = VTy->getNumElements();
+
+  CodeGen::CGBuilderTy &Builder = CGF.Builder;
+
+  // AArch64 scalar builtins are not overloaded, they do not have an extra
+  // argument that specifies the vector type, need to handle each case.
+  SmallVector<Value *, 2> TblOps;
+  switch (BuiltinID) {
+  case NEON::BI__builtin_neon_vtbl1_v: {
+    TblOps.push_back(Ops[0]);
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty,
+                              Intrinsic::aarch64_neon_tbl1, "vtbl1");
+  }
+  case NEON::BI__builtin_neon_vtbl2_v: {
+    TblOps.push_back(Ops[0]);
+    TblOps.push_back(Ops[1]);
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
+                              Intrinsic::aarch64_neon_tbl1, "vtbl1");
+  }
+  case NEON::BI__builtin_neon_vtbl3_v: {
+    TblOps.push_back(Ops[0]);
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty,
+                              Intrinsic::aarch64_neon_tbl2, "vtbl2");
+  }
+  case NEON::BI__builtin_neon_vtbl4_v: {
+    TblOps.push_back(Ops[0]);
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    TblOps.push_back(Ops[3]);
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
+                              Intrinsic::aarch64_neon_tbl2, "vtbl2");
+  }
+  case NEON::BI__builtin_neon_vtbx1_v: {
+    TblOps.push_back(Ops[1]);
+    Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
+                                       Intrinsic::aarch64_neon_tbl1, "vtbl1");
+
+    llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
+    Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
+    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
+    CmpRes = Builder.CreateSExt(CmpRes, Ty);
+
+    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
+    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
+    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
+  }
+  case NEON::BI__builtin_neon_vtbx2_v: {
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
+                              Intrinsic::aarch64_neon_tbx1, "vtbx1");
+  }
+  case NEON::BI__builtin_neon_vtbx3_v: {
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    TblOps.push_back(Ops[3]);
+    Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
+                                       Intrinsic::aarch64_neon_tbl2, "vtbl2");
+
+    llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
+    Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
+    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
+                                           TwentyFourV);
+    CmpRes = Builder.CreateSExt(CmpRes, Ty);
+
+    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
+    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
+    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
+  }
+  case NEON::BI__builtin_neon_vtbx4_v: {
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    TblOps.push_back(Ops[3]);
+    TblOps.push_back(Ops[4]);
+    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
+                              Intrinsic::aarch64_neon_tbx2, "vtbx2");
+  }
+  case NEON::BI__builtin_neon_vqtbl1_v:
+  case NEON::BI__builtin_neon_vqtbl1q_v:
+    Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
+  case NEON::BI__builtin_neon_vqtbl2_v:
+  case NEON::BI__builtin_neon_vqtbl2q_v: {
+    Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
+  case NEON::BI__builtin_neon_vqtbl3_v:
+  case NEON::BI__builtin_neon_vqtbl3q_v:
+    Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
+  case NEON::BI__builtin_neon_vqtbl4_v:
+  case NEON::BI__builtin_neon_vqtbl4q_v:
+    Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
+  case NEON::BI__builtin_neon_vqtbx1_v:
+  case NEON::BI__builtin_neon_vqtbx1q_v:
+    Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
+  case NEON::BI__builtin_neon_vqtbx2_v:
+  case NEON::BI__builtin_neon_vqtbx2q_v:
+    Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
+  case NEON::BI__builtin_neon_vqtbx3_v:
+  case NEON::BI__builtin_neon_vqtbx3q_v:
+    Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
+  case NEON::BI__builtin_neon_vqtbx4_v:
+  case NEON::BI__builtin_neon_vqtbx4q_v:
+    Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
+  }
+  }
+
+  if (!Int)
+    return nullptr;
+
+  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
+  return CGF.EmitNeonCall(F, Ops, s);
+}
+
+Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
+  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
+  Op = Builder.CreateBitCast(Op, Int16Ty);
+  Value *V = UndefValue::get(VTy);
+  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
+  Op = Builder.CreateInsertElement(V, Op, CI);
+  return Op;
+}
+
+Value *CodeGenFunction::vectorWrapScalar8(Value *Op) {
+  llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
+  Op = Builder.CreateBitCast(Op, Int8Ty);
+  Value *V = UndefValue::get(VTy);
+  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
+  Op = Builder.CreateInsertElement(V, Op, CI);
+  return Op;
+}
+
+Value *CodeGenFunction::
+emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
+                                  const char *Name) {
+  // i8 is not a legal types for AArch64, so we can't just use
+  // a normal overloaded intrinsic call for these scalar types. Instead
+  // we'll build 64-bit vectors w/ lane zero being our input values and
+  // perform the operation on that. The back end can pattern match directly
+  // to the scalar instruction.
+  Ops[0] = vectorWrapScalar8(Ops[0]);
+  Ops[1] = vectorWrapScalar8(Ops[1]);
+  llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
+  Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name);
+  Constant *CI = ConstantInt::get(SizeTy, 0);
+  return Builder.CreateExtractElement(V, CI, "lane0");
+}
+
+Value *CodeGenFunction::
+emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
+                                   const char *Name) {
+  // i16 is not a legal types for AArch64, so we can't just use
+  // a normal overloaded intrinsic call for these scalar types. Instead
+  // we'll build 64-bit vectors w/ lane zero being our input values and
+  // perform the operation on that. The back end can pattern match directly
+  // to the scalar instruction.
+  Ops[0] = vectorWrapScalar16(Ops[0]);
+  Ops[1] = vectorWrapScalar16(Ops[1]);
+  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
+  Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name);
+  Constant *CI = ConstantInt::get(SizeTy, 0);
+  return Builder.CreateExtractElement(V, CI, "lane0");
+}
+
+Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
+                                               const CallExpr *E) {
+  unsigned HintID = static_cast<unsigned>(-1);
+  switch (BuiltinID) {
+  default: break;
+  case AArch64::BI__builtin_arm_nop:
+    HintID = 0;
+    break;
+  case AArch64::BI__builtin_arm_yield:
+    HintID = 1;
+    break;
+  case AArch64::BI__builtin_arm_wfe:
+    HintID = 2;
+    break;
+  case AArch64::BI__builtin_arm_wfi:
+    HintID = 3;
+    break;
+  case AArch64::BI__builtin_arm_sev:
+    HintID = 4;
+    break;
+  case AArch64::BI__builtin_arm_sevl:
+    HintID = 5;
+    break;
+  }
+
+  if (HintID != static_cast<unsigned>(-1)) {
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
+    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
+  }
+
+  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
+    assert((getContext().getTypeSize(E->getType()) == 32) &&
+           "rbit of unusual size!");
+    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
+  }
+  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
+    assert((getContext().getTypeSize(E->getType()) == 64) &&
+           "rbit of unusual size!");
+    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
+  }
+
+  if (BuiltinID == AArch64::BI__clear_cache) {
+    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
+    const FunctionDecl *FD = E->getDirectCallee();
+    SmallVector<Value*, 2> Ops;
+    for (unsigned i = 0; i < 2; i++)
+      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
+    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
+    StringRef Name = FD->getName();
+    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
+  }
+
+  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
+      BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
+      getContext().getTypeSize(E->getType()) == 128) {
+    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
+                                       ? Intrinsic::aarch64_ldaxp
+                                       : Intrinsic::aarch64_ldxp);
+
+    Value *LdPtr = EmitScalarExpr(E->getArg(0));
+    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
+                                    "ldxp");
+
+    Value *Val0 = Builder.CreateExtractValue(Val, 1);
+    Value *Val1 = Builder.CreateExtractValue(Val, 0);
+    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
+    Val0 = Builder.CreateZExt(Val0, Int128Ty);
+    Val1 = Builder.CreateZExt(Val1, Int128Ty);
+
+    Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
+    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
+    Val = Builder.CreateOr(Val, Val1);
+    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
+  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
+             BuiltinID == AArch64::BI__builtin_arm_ldaex) {
+    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
+
+    QualType Ty = E->getType();
+    llvm::Type *RealResTy = ConvertType(Ty);
+    llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
+                                                  getContext().getTypeSize(Ty));
+    LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
+
+    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
+                                       ? Intrinsic::aarch64_ldaxr
+                                       : Intrinsic::aarch64_ldxr,
+                                   LoadAddr->getType());
+    Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
+
+    if (RealResTy->isPointerTy())
+      return Builder.CreateIntToPtr(Val, RealResTy);
+
+    Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
+    return Builder.CreateBitCast(Val, RealResTy);
+  }
+
+  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
+       BuiltinID == AArch64::BI__builtin_arm_stlex) &&
+      getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
+    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
+                                       ? Intrinsic::aarch64_stlxp
+                                       : Intrinsic::aarch64_stxp);
+    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL);
+
+    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
+    Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()),
+                                      One);
+    Value *Val = EmitScalarExpr(E->getArg(0));
+    Builder.CreateStore(Val, Tmp);
+
+    Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
+    Val = Builder.CreateLoad(LdPtr);
+
+    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
+    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
+    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
+                                         Int8PtrTy);
+    return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp");
+  } else if (BuiltinID == AArch64::BI__builtin_arm_strex ||
+             BuiltinID == AArch64::BI__builtin_arm_stlex) {
+    Value *StoreVal = EmitScalarExpr(E->getArg(0));
+    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
+
+    QualType Ty = E->getArg(0)->getType();
+    llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
+                                                 getContext().getTypeSize(Ty));
+    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
+
+    if (StoreVal->getType()->isPointerTy())
+      StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
+    else {
+      StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
+      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
+    }
+
+    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
+                                       ? Intrinsic::aarch64_stlxr
+                                       : Intrinsic::aarch64_stxr,
+                                   StoreAddr->getType());
+    return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr");
+  }
+
+  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
+    return Builder.CreateCall(F);
+  }
+
+  // CRC32
+  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
+  switch (BuiltinID) {
+  case AArch64::BI__builtin_arm_crc32b:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
+  case AArch64::BI__builtin_arm_crc32cb:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
+  case AArch64::BI__builtin_arm_crc32h:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
+  case AArch64::BI__builtin_arm_crc32ch:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
+  case AArch64::BI__builtin_arm_crc32w:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
+  case AArch64::BI__builtin_arm_crc32cw:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
+  case AArch64::BI__builtin_arm_crc32d:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
+  case AArch64::BI__builtin_arm_crc32cd:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
+  }
+
+  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
+    Value *Arg0 = EmitScalarExpr(E->getArg(0));
+    Value *Arg1 = EmitScalarExpr(E->getArg(1));
+    Function *F = CGM.getIntrinsic(CRCIntrinsicID);
+
+    llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
+    Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
+
+    return Builder.CreateCall2(F, Arg0, Arg1);
+  }
+
+  llvm::SmallVector<Value*, 4> Ops;
+  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
+    Ops.push_back(EmitScalarExpr(E->getArg(i)));
+
+  ArrayRef<NeonIntrinsicInfo> SISDMap(AArch64SISDIntrinsicMap);
+  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+      SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
+
+  if (Builtin) {
+    Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
+    Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
+    assert(Result && "SISD intrinsic should have been handled");
+    return Result;
+  }
+
+  llvm::APSInt Result;
+  const Expr *Arg = E->getArg(E->getNumArgs()-1);
+  NeonTypeFlags Type(0);
+  if (Arg->isIntegerConstantExpr(Result, getContext()))
+    // Determine the type of this overloaded NEON intrinsic.
+    Type = NeonTypeFlags(Result.getZExtValue());
+
+  bool usgn = Type.isUnsigned();
+  bool quad = Type.isQuad();
+
+  // Handle non-overloaded intrinsics first.
+  switch (BuiltinID) {
+  default: break;
+  case NEON::BI__builtin_neon_vldrq_p128: {
+    llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
+    Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
+    return Builder.CreateLoad(Ptr);
+  }
+  case NEON::BI__builtin_neon_vstrq_p128: {
+    llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
+    Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
+    return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr);
+  }
+  case NEON::BI__builtin_neon_vcvts_u32_f32:
+  case NEON::BI__builtin_neon_vcvtd_u64_f64:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvts_s32_f32:
+  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
+    llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
+    llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
+    Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
+    if (usgn)
+      return Builder.CreateFPToUI(Ops[0], InTy);
+    return Builder.CreateFPToSI(Ops[0], InTy);
+  }
+  case NEON::BI__builtin_neon_vcvts_f32_u32:
+  case NEON::BI__builtin_neon_vcvtd_f64_u64:
+    usgn = true;
+    // FALL THROUGH
+  case NEON::BI__builtin_neon_vcvts_f32_s32:
+  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
+    llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
+    llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
+    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+    if (usgn)
+      return Builder.CreateUIToFP(Ops[0], FTy);
+    return Builder.CreateSIToFP(Ops[0], FTy);
+  }
+  case NEON::BI__builtin_neon_vpaddd_s64: {
+    llvm::Type *Ty =
+      llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2);
+    Value *Vec = EmitScalarExpr(E->getArg(0));
+    // The vector is v2f64, so make sure it's bitcast to that.
+    Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
+    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
+    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
+    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
+    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+    // Pairwise addition of a v2f64 into a scalar f64.
+    return Builder.CreateAdd(Op0, Op1, "vpaddd");
+  }
+  case NEON::BI__builtin_neon_vpaddd_f64: {
+    llvm::Type *Ty =
+      llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2);
+    Value *Vec = EmitScalarExpr(E->getArg(0));
+    // The vector is v2f64, so make sure it's bitcast to that.
+    Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
+    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
+    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
+    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
+    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+    // Pairwise addition of a v2f64 into a scalar f64.
+    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
+  }
+  case NEON::BI__builtin_neon_vpadds_f32: {
+    llvm::Type *Ty =
+      llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2);
+    Value *Vec = EmitScalarExpr(E->getArg(0));
+    // The vector is v2f32, so make sure it's bitcast to that.
+    Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
+    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
+    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
+    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
+    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+    // Pairwise addition of a v2f32 into a scalar f32.
+    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
+  }
+  case NEON::BI__builtin_neon_vceqzd_s64:
+  case NEON::BI__builtin_neon_vceqzd_f64:
+  case NEON::BI__builtin_neon_vceqzs_f32:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitAArch64CompareBuiltinExpr(
+        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ,
+        ICmpInst::ICMP_EQ, "vceqz");
+  case NEON::BI__builtin_neon_vcgezd_s64:
+  case NEON::BI__builtin_neon_vcgezd_f64:
+  case NEON::BI__builtin_neon_vcgezs_f32:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitAArch64CompareBuiltinExpr(
+        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE,
+        ICmpInst::ICMP_SGE, "vcgez");
+  case NEON::BI__builtin_neon_vclezd_s64:
+  case NEON::BI__builtin_neon_vclezd_f64:
+  case NEON::BI__builtin_neon_vclezs_f32:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitAArch64CompareBuiltinExpr(
+        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE,
+        ICmpInst::ICMP_SLE, "vclez");
+  case NEON::BI__builtin_neon_vcgtzd_s64:
+  case NEON::BI__builtin_neon_vcgtzd_f64:
+  case NEON::BI__builtin_neon_vcgtzs_f32:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitAArch64CompareBuiltinExpr(
+        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT,
+        ICmpInst::ICMP_SGT, "vcgtz");
+  case NEON::BI__builtin_neon_vcltzd_s64:
+  case NEON::BI__builtin_neon_vcltzd_f64:
+  case NEON::BI__builtin_neon_vcltzs_f32:
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitAArch64CompareBuiltinExpr(
+        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT,
+        ICmpInst::ICMP_SLT, "vcltz");
+
+  case NEON::BI__builtin_neon_vceqzd_u64: {
+    llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0],
+                                llvm::Constant::getNullValue(Ty));
+    return Builder.CreateSExt(Ops[0], Ty, "vceqzd");
+  }
+  case NEON::BI__builtin_neon_vceqd_f64:
+  case NEON::BI__builtin_neon_vcled_f64:
+  case NEON::BI__builtin_neon_vcltd_f64:
+  case NEON::BI__builtin_neon_vcged_f64:
+  case NEON::BI__builtin_neon_vcgtd_f64: {
+    llvm::CmpInst::Predicate P;
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
+    case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
+    case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
+    case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
+    case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
+    }
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
+    Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
+    return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
+  }
+  case NEON::BI__builtin_neon_vceqs_f32:
+  case NEON::BI__builtin_neon_vcles_f32:
+  case NEON::BI__builtin_neon_vclts_f32:
+  case NEON::BI__builtin_neon_vcges_f32:
+  case NEON::BI__builtin_neon_vcgts_f32: {
+    llvm::CmpInst::Predicate P;
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
+    case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
+    case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
+    case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
+    case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
+    }
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
+    Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
+    return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
+  }
+  case NEON::BI__builtin_neon_vceqd_s64:
+  case NEON::BI__builtin_neon_vceqd_u64:
+  case NEON::BI__builtin_neon_vcgtd_s64:
+  case NEON::BI__builtin_neon_vcgtd_u64:
+  case NEON::BI__builtin_neon_vcltd_s64:
+  case NEON::BI__builtin_neon_vcltd_u64:
+  case NEON::BI__builtin_neon_vcged_u64:
+  case NEON::BI__builtin_neon_vcged_s64:
+  case NEON::BI__builtin_neon_vcled_u64:
+  case NEON::BI__builtin_neon_vcled_s64: {
+    llvm::CmpInst::Predicate P;
+    switch (BuiltinID) {
+    default: llvm_unreachable("missing builtin ID in switch!");
+    case NEON::BI__builtin_neon_vceqd_s64:
+    case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
+    case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
+    case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
+    case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
+    case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
+    case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
+    case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
+    case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
+    case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
+    }
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+    Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
+    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
+  }
+  case NEON::BI__builtin_neon_vtstd_s64:
+  case NEON::BI__builtin_neon_vtstd_u64: {
+    llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
-                                ConstantAggregateZero::get(Ty));
-    return Builder.CreateSExt(Ops[0], Ty, "vtst");
+                                llvm::Constant::getNullValue(Ty));
+    return Builder.CreateSExt(Ops[0], Ty, "vtstd");
+  }
+  case NEON::BI__builtin_neon_vset_lane_i8:
+  case NEON::BI__builtin_neon_vset_lane_i16:
+  case NEON::BI__builtin_neon_vset_lane_i32:
+  case NEON::BI__builtin_neon_vset_lane_i64:
+  case NEON::BI__builtin_neon_vset_lane_f32:
+  case NEON::BI__builtin_neon_vsetq_lane_i8:
+  case NEON::BI__builtin_neon_vsetq_lane_i16:
+  case NEON::BI__builtin_neon_vsetq_lane_i32:
+  case NEON::BI__builtin_neon_vsetq_lane_i64:
+  case NEON::BI__builtin_neon_vsetq_lane_f32:
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+  case NEON::BI__builtin_neon_vset_lane_f64:
+    // The vector type needs a cast for the v1f64 variant.
+    Ops[1] = Builder.CreateBitCast(Ops[1],
+                                   llvm::VectorType::get(DoubleTy, 1));
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+  case NEON::BI__builtin_neon_vsetq_lane_f64:
+    // The vector type needs a cast for the v2f64 variant.
+    Ops[1] = Builder.CreateBitCast(Ops[1],
+        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2));
+    Ops.push_back(EmitScalarExpr(E->getArg(2)));
+    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
+
+  case NEON::BI__builtin_neon_vget_lane_i8:
+  case NEON::BI__builtin_neon_vdupb_lane_i8:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vget_lane");
+  case NEON::BI__builtin_neon_vgetq_lane_i8:
+  case NEON::BI__builtin_neon_vdupb_laneq_i8:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vgetq_lane");
+  case NEON::BI__builtin_neon_vget_lane_i16:
+  case NEON::BI__builtin_neon_vduph_lane_i16:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vget_lane");
+  case NEON::BI__builtin_neon_vgetq_lane_i16:
+  case NEON::BI__builtin_neon_vduph_laneq_i16:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vgetq_lane");
+  case NEON::BI__builtin_neon_vget_lane_i32:
+  case NEON::BI__builtin_neon_vdups_lane_i32:
+    Ops[0] = Builder.CreateBitCast(
+        Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vget_lane");
+  case NEON::BI__builtin_neon_vdups_lane_f32:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vdups_lane");
+  case NEON::BI__builtin_neon_vgetq_lane_i32:
+  case NEON::BI__builtin_neon_vdups_laneq_i32:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vgetq_lane");
+  case NEON::BI__builtin_neon_vget_lane_i64:
+  case NEON::BI__builtin_neon_vdupd_lane_i64:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vget_lane");
+  case NEON::BI__builtin_neon_vdupd_lane_f64:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vdupd_lane");
+  case NEON::BI__builtin_neon_vgetq_lane_i64:
+  case NEON::BI__builtin_neon_vdupd_laneq_i64:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vgetq_lane");
+  case NEON::BI__builtin_neon_vget_lane_f32:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vget_lane");
+  case NEON::BI__builtin_neon_vget_lane_f64:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vget_lane");
+  case NEON::BI__builtin_neon_vgetq_lane_f32:
+  case NEON::BI__builtin_neon_vdups_laneq_f32:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vgetq_lane");
+  case NEON::BI__builtin_neon_vgetq_lane_f64:
+  case NEON::BI__builtin_neon_vdupd_laneq_f64:
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2));
+    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
+                                        "vgetq_lane");
+  case NEON::BI__builtin_neon_vaddd_s64:
+  case NEON::BI__builtin_neon_vaddd_u64:
+    return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
+  case NEON::BI__builtin_neon_vsubd_s64:
+  case NEON::BI__builtin_neon_vsubd_u64:
+    return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
+  case NEON::BI__builtin_neon_vqdmlalh_s16:
+  case NEON::BI__builtin_neon_vqdmlslh_s16: {
+    SmallVector<Value *, 2> ProductOps;
+    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
+    ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
+    llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
+    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
+                          ProductOps, "vqdmlXl");
+    Constant *CI = ConstantInt::get(SizeTy, 0);
+    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
+
+    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
+                                        ? Intrinsic::aarch64_neon_sqadd
+                                        : Intrinsic::aarch64_neon_sqsub;
+    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
+  }
+  case NEON::BI__builtin_neon_vqshlud_n_s64: {
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
+                        Ops, "vqshlu_n");
+  }
+  case NEON::BI__builtin_neon_vqshld_n_u64:
+  case NEON::BI__builtin_neon_vqshld_n_s64: {
+    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
+                                   ? Intrinsic::aarch64_neon_uqshl
+                                   : Intrinsic::aarch64_neon_sqshl;
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
+  }
+  case NEON::BI__builtin_neon_vrshrd_n_u64:
+  case NEON::BI__builtin_neon_vrshrd_n_s64: {
+    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
+                                   ? Intrinsic::aarch64_neon_urshl
+                                   : Intrinsic::aarch64_neon_srshl;
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
+    Ops[1] = ConstantInt::get(Int64Ty, -SV);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
+  }
+  case NEON::BI__builtin_neon_vrsrad_n_u64:
+  case NEON::BI__builtin_neon_vrsrad_n_s64: {
+    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
+                                   ? Intrinsic::aarch64_neon_urshl
+                                   : Intrinsic::aarch64_neon_srshl;
+    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+    Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
+    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1],
+                                 Builder.CreateSExt(Ops[2], Int64Ty));
+    return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
+  }
+  case NEON::BI__builtin_neon_vshld_n_s64:
+  case NEON::BI__builtin_neon_vshld_n_u64: {
+    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateShl(
+        Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
+  }
+  case NEON::BI__builtin_neon_vshrd_n_s64: {
+    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+    return Builder.CreateAShr(
+        Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
+                                                   Amt->getZExtValue())),
+        "shrd_n");
+  }
+  case NEON::BI__builtin_neon_vshrd_n_u64: {
+    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+    uint64_t ShiftAmt = Amt->getZExtValue();
+    // Right-shifting an unsigned value by its size yields 0.
+    if (ShiftAmt == 64)
+      return ConstantInt::get(Int64Ty, 0);
+    return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
+                              "shrd_n");
+  }
+  case NEON::BI__builtin_neon_vsrad_n_s64: {
+    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
+    Ops[1] = Builder.CreateAShr(
+        Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
+                                                   Amt->getZExtValue())),
+        "shrd_n");
+    return Builder.CreateAdd(Ops[0], Ops[1]);
+  }
+  case NEON::BI__builtin_neon_vsrad_n_u64: {
+    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
+    uint64_t ShiftAmt = Amt->getZExtValue();
+    // Right-shifting an unsigned value by its size yields 0.
+    // As Op + 0 = Op, return Ops[0] directly.
+    if (ShiftAmt == 64)
+      return Ops[0];
+    Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
+                                "shrd_n");
+    return Builder.CreateAdd(Ops[0], Ops[1]);
+  }
+  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
+  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
+  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
+  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
+    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
+                                          "lane");
+    SmallVector<Value *, 2> ProductOps;
+    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
+    ProductOps.push_back(vectorWrapScalar16(Ops[2]));
+    llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
+    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
+                          ProductOps, "vqdmlXl");
+    Constant *CI = ConstantInt::get(SizeTy, 0);
+    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
+    Ops.pop_back();
+
+    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
+                       BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
+                          ? Intrinsic::aarch64_neon_sqadd
+                          : Intrinsic::aarch64_neon_sqsub;
+    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
+  }
+  case NEON::BI__builtin_neon_vqdmlals_s32:
+  case NEON::BI__builtin_neon_vqdmlsls_s32: {
+    SmallVector<Value *, 2> ProductOps;
+    ProductOps.push_back(Ops[1]);
+    ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
+    Ops[1] =
+        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
+                     ProductOps, "vqdmlXl");
+
+    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
+                                        ? Intrinsic::aarch64_neon_sqadd
+                                        : Intrinsic::aarch64_neon_sqsub;
+    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
+  }
+  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
+  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
+  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
+  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
+    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
+                                          "lane");
+    SmallVector<Value *, 2> ProductOps;
+    ProductOps.push_back(Ops[1]);
+    ProductOps.push_back(Ops[2]);
+    Ops[1] =
+        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
+                     ProductOps, "vqdmlXl");
+    Ops.pop_back();
+
+    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
+                       BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
+                          ? Intrinsic::aarch64_neon_sqadd
+                          : Intrinsic::aarch64_neon_sqsub;
+    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
+  }
+  }
+
+  llvm::VectorType *VTy = GetNeonType(this, Type);
+  llvm::Type *Ty = VTy;
+  if (!Ty)
+    return nullptr;
+
+  // Not all intrinsics handled by the common case work for AArch64 yet, so only
+  // defer to common code if it's been added to our special map.
+  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
+                                   AArch64SIMDIntrinsicsProvenSorted);
+
+  if (Builtin)
+    return EmitCommonNeonBuiltinExpr(
+        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
+        Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr);
+
+  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
+    return V;
+
+  unsigned Int;
+  switch (BuiltinID) {
+  default: return nullptr;
+  case NEON::BI__builtin_neon_vbsl_v:
+  case NEON::BI__builtin_neon_vbslq_v: {
+    llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
+    Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
+    Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
+
+    Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
+    Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
+    Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
+    return Builder.CreateBitCast(Ops[0], Ty);
+  }
+  case NEON::BI__builtin_neon_vfma_lane_v:
+  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
+    // The ARM builtins (and instructions) have the addend as the first
+    // operand, but the 'fma' intrinsics have it last. Swap it around here.
+    Value *Addend = Ops[0];
+    Value *Multiplicand = Ops[1];
+    Value *LaneSource = Ops[2];
+    Ops[0] = Multiplicand;
+    Ops[1] = LaneSource;
+    Ops[2] = Addend;
+
+    // Now adjust things to handle the lane access.
+    llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
+      llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
+      VTy;
+    llvm::Constant *cst = cast<Constant>(Ops[3]);
+    Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
+    Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
+    Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
+
+    Ops.pop_back();
+    Int = Intrinsic::fma;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
+  }
+  case NEON::BI__builtin_neon_vfma_laneq_v: {
+    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+    // v1f64 fma should be mapped to Neon scalar f64 fma
+    if (VTy && VTy->getElementType() == DoubleTy) {
+      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
+      llvm::Type *VTy = GetNeonType(this,
+        NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
+      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+      Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
+      Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+      return Builder.CreateBitCast(Result, Ty);
+    }
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+
+    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
+                                            VTy->getNumElements() * 2);
+    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
+    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
+                                               cast<ConstantInt>(Ops[3]));
+    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
+
+    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+
+    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
+    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vfmas_lane_f32:
+  case NEON::BI__builtin_neon_vfmas_laneq_f32:
+  case NEON::BI__builtin_neon_vfmad_lane_f64:
+  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
+    Ops.push_back(EmitScalarExpr(E->getArg(3)));
+    llvm::Type *Ty = ConvertType(E->getCallReturnType());
+    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+    Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vfms_v:
+  case NEON::BI__builtin_neon_vfmsq_v: {  // Only used for FP types
+    // FIXME: probably remove when we no longer support aarch64_simd.h
+    // (arm_neon.h delegates to vfma).
+
+    // The ARM builtins (and instructions) have the addend as the first
+    // operand, but the 'fma' intrinsics have it last. Swap it around here.
+    Value *Subtrahend = Ops[0];
+    Value *Multiplicand = Ops[2];
+    Ops[0] = Multiplicand;
+    Ops[2] = Subtrahend;
+    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+    Ops[1] = Builder.CreateFNeg(Ops[1]);
+    Int = Intrinsic::fma;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
+  }
+  case NEON::BI__builtin_neon_vmull_v:
+    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+    Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
+    if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
+  case NEON::BI__builtin_neon_vmax_v:
+  case NEON::BI__builtin_neon_vmaxq_v:
+    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+    Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
+  case NEON::BI__builtin_neon_vmin_v:
+  case NEON::BI__builtin_neon_vminq_v:
+    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+    Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
+  case NEON::BI__builtin_neon_vabd_v:
+  case NEON::BI__builtin_neon_vabdq_v:
+    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+    Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
+  case NEON::BI__builtin_neon_vpadal_v:
+  case NEON::BI__builtin_neon_vpadalq_v: {
+    unsigned ArgElts = VTy->getNumElements();
+    llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
+    unsigned BitWidth = EltTy->getBitWidth();
+    llvm::Type *ArgTy = llvm::VectorType::get(
+        llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
+    llvm::Type* Tys[2] = { VTy, ArgTy };
+    Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
+    SmallVector<llvm::Value*, 1> TmpOps;
+    TmpOps.push_back(Ops[1]);
+    Function *F = CGM.getIntrinsic(Int, Tys);
+    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
+    llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
+    return Builder.CreateAdd(tmp, addend);
+  }
+  case NEON::BI__builtin_neon_vpmin_v:
+  case NEON::BI__builtin_neon_vpminq_v:
+    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+    Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
+  case NEON::BI__builtin_neon_vpmax_v:
+  case NEON::BI__builtin_neon_vpmaxq_v:
+    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
+    Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
+  case NEON::BI__builtin_neon_vminnm_v:
+  case NEON::BI__builtin_neon_vminnmq_v:
+    Int = Intrinsic::aarch64_neon_fminnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
+  case NEON::BI__builtin_neon_vmaxnm_v:
+  case NEON::BI__builtin_neon_vmaxnmq_v:
+    Int = Intrinsic::aarch64_neon_fmaxnm;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
+  case NEON::BI__builtin_neon_vrecpss_f32: {
+    llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext());
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type),
+                        Ops, "vrecps");
+  }
+  case NEON::BI__builtin_neon_vrecpsd_f64: {
+    llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext());
+    Ops.push_back(EmitScalarExpr(E->getArg(1)));
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type),
+                        Ops, "vrecps");
+  }
+  case NEON::BI__builtin_neon_vrshr_n_v:
+  case NEON::BI__builtin_neon_vrshrq_n_v:
+    // FIXME: this can be shared with 32-bit ARM, but not AArch64 at the
+    // moment. After the final merge it should be added to
+    // EmitCommonNeonBuiltinExpr.
+    Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
+  case NEON::BI__builtin_neon_vqshlu_n_v:
+  case NEON::BI__builtin_neon_vqshluq_n_v:
+    // FIXME: AArch64 and ARM use different intrinsics for this, but are
+    // essentially compatible. It should be in EmitCommonNeonBuiltinExpr after
+    // the final merge.
+    Int = Intrinsic::aarch64_neon_sqshlu;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 1, false);
+  case NEON::BI__builtin_neon_vqshrun_n_v:
+    // FIXME: as above
+    Int = Intrinsic::aarch64_neon_sqshrun;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
+  case NEON::BI__builtin_neon_vqrshrun_n_v:
+    // FIXME: and again.
+    Int = Intrinsic::aarch64_neon_sqrshrun;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
+  case NEON::BI__builtin_neon_vqshrn_n_v:
+    // FIXME: guess
+    Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
+  case NEON::BI__builtin_neon_vrshrn_n_v:
+    // FIXME: there might be a pattern here.
+    Int = Intrinsic::aarch64_neon_rshrn;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
+  case NEON::BI__builtin_neon_vqrshrn_n_v:
+    // FIXME: another one
+    Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
+  case NEON::BI__builtin_neon_vrnda_v:
+  case NEON::BI__builtin_neon_vrndaq_v: {
+    Int = Intrinsic::round;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
+  }
+  case NEON::BI__builtin_neon_vrndi_v:
+  case NEON::BI__builtin_neon_vrndiq_v: {
+    Int = Intrinsic::nearbyint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
+  }
+  case NEON::BI__builtin_neon_vrndm_v:
+  case NEON::BI__builtin_neon_vrndmq_v: {
+    Int = Intrinsic::floor;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
+  }
+  case NEON::BI__builtin_neon_vrndn_v:
+  case NEON::BI__builtin_neon_vrndnq_v: {
+    Int = Intrinsic::aarch64_neon_frintn;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
+  }
+  case NEON::BI__builtin_neon_vrndp_v:
+  case NEON::BI__builtin_neon_vrndpq_v: {
+    Int = Intrinsic::ceil;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
+  }
+  case NEON::BI__builtin_neon_vrndx_v:
+  case NEON::BI__builtin_neon_vrndxq_v: {
+    Int = Intrinsic::rint;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
+  }
+  case NEON::BI__builtin_neon_vrnd_v:
+  case NEON::BI__builtin_neon_vrndq_v: {
+    Int = Intrinsic::trunc;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
+  }
+  case NEON::BI__builtin_neon_vceqz_v:
+  case NEON::BI__builtin_neon_vceqzq_v:
+    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
+                                         ICmpInst::ICMP_EQ, "vceqz");
+  case NEON::BI__builtin_neon_vcgez_v:
+  case NEON::BI__builtin_neon_vcgezq_v:
+    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
+                                         ICmpInst::ICMP_SGE, "vcgez");
+  case NEON::BI__builtin_neon_vclez_v:
+  case NEON::BI__builtin_neon_vclezq_v:
+    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
+                                         ICmpInst::ICMP_SLE, "vclez");
+  case NEON::BI__builtin_neon_vcgtz_v:
+  case NEON::BI__builtin_neon_vcgtzq_v:
+    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
+                                         ICmpInst::ICMP_SGT, "vcgtz");
+  case NEON::BI__builtin_neon_vcltz_v:
+  case NEON::BI__builtin_neon_vcltzq_v:
+    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
+                                         ICmpInst::ICMP_SLT, "vcltz");
+  case NEON::BI__builtin_neon_vcvt_f64_v:
+  case NEON::BI__builtin_neon_vcvtq_f64_v:
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
+    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
+                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+  case NEON::BI__builtin_neon_vcvt_f64_f32: {
+    assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
+           "unexpected vcvt_f64_f32 builtin");
+    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
+    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+
+    return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
+  }
+  case NEON::BI__builtin_neon_vcvt_f32_f64: {
+    assert(Type.getEltType() == NeonTypeFlags::Float32 &&
+           "unexpected vcvt_f32_f64 builtin");
+    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
+    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+
+    return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
+  }
+  case NEON::BI__builtin_neon_vcvt_s32_v:
+  case NEON::BI__builtin_neon_vcvt_u32_v:
+  case NEON::BI__builtin_neon_vcvt_s64_v:
+  case NEON::BI__builtin_neon_vcvt_u64_v:
+  case NEON::BI__builtin_neon_vcvtq_s32_v:
+  case NEON::BI__builtin_neon_vcvtq_u32_v:
+  case NEON::BI__builtin_neon_vcvtq_s64_v:
+  case NEON::BI__builtin_neon_vcvtq_u64_v: {
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+    llvm::Type *InTy =
+      GetNeonType(this,
+                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                : NeonTypeFlags::Float32, false, quad));
+    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+    if (usgn)
+      return Builder.CreateFPToUI(Ops[0], Ty);
+    return Builder.CreateFPToSI(Ops[0], Ty);
+  }
+  case NEON::BI__builtin_neon_vcvta_s32_v:
+  case NEON::BI__builtin_neon_vcvtaq_s32_v:
+  case NEON::BI__builtin_neon_vcvta_u32_v:
+  case NEON::BI__builtin_neon_vcvtaq_u32_v:
+  case NEON::BI__builtin_neon_vcvta_s64_v:
+  case NEON::BI__builtin_neon_vcvtaq_s64_v:
+  case NEON::BI__builtin_neon_vcvta_u64_v:
+  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+    llvm::Type *InTy =
+      GetNeonType(this,
+                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                : NeonTypeFlags::Float32, false, quad));
+    llvm::Type *Tys[2] = { Ty, InTy };
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
+  }
+  case NEON::BI__builtin_neon_vcvtm_s32_v:
+  case NEON::BI__builtin_neon_vcvtmq_s32_v:
+  case NEON::BI__builtin_neon_vcvtm_u32_v:
+  case NEON::BI__builtin_neon_vcvtmq_u32_v:
+  case NEON::BI__builtin_neon_vcvtm_s64_v:
+  case NEON::BI__builtin_neon_vcvtmq_s64_v:
+  case NEON::BI__builtin_neon_vcvtm_u64_v:
+  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+    llvm::Type *InTy =
+      GetNeonType(this,
+                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                : NeonTypeFlags::Float32, false, quad));
+    llvm::Type *Tys[2] = { Ty, InTy };
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
+  }
+  case NEON::BI__builtin_neon_vcvtn_s32_v:
+  case NEON::BI__builtin_neon_vcvtnq_s32_v:
+  case NEON::BI__builtin_neon_vcvtn_u32_v:
+  case NEON::BI__builtin_neon_vcvtnq_u32_v:
+  case NEON::BI__builtin_neon_vcvtn_s64_v:
+  case NEON::BI__builtin_neon_vcvtnq_s64_v:
+  case NEON::BI__builtin_neon_vcvtn_u64_v:
+  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+    llvm::Type *InTy =
+      GetNeonType(this,
+                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                : NeonTypeFlags::Float32, false, quad));
+    llvm::Type *Tys[2] = { Ty, InTy };
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
+  }
+  case NEON::BI__builtin_neon_vcvtp_s32_v:
+  case NEON::BI__builtin_neon_vcvtpq_s32_v:
+  case NEON::BI__builtin_neon_vcvtp_u32_v:
+  case NEON::BI__builtin_neon_vcvtpq_u32_v:
+  case NEON::BI__builtin_neon_vcvtp_s64_v:
+  case NEON::BI__builtin_neon_vcvtpq_s64_v:
+  case NEON::BI__builtin_neon_vcvtp_u64_v:
+  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
+    bool Double =
+      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
+    llvm::Type *InTy =
+      GetNeonType(this,
+                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
+                                : NeonTypeFlags::Float32, false, quad));
+    llvm::Type *Tys[2] = { Ty, InTy };
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
+  }
+  case NEON::BI__builtin_neon_vmulx_v:
+  case NEON::BI__builtin_neon_vmulxq_v: {
+    Int = Intrinsic::aarch64_neon_fmulx;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
+  }
+  case NEON::BI__builtin_neon_vmul_lane_v:
+  case NEON::BI__builtin_neon_vmul_laneq_v: {
+    // v1f64 vmul_lane should be mapped to Neon scalar mul lane
+    bool Quad = false;
+    if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
+      Quad = true;
+    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+    llvm::Type *VTy = GetNeonType(this,
+      NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
+    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
+    Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
+    return Builder.CreateBitCast(Result, Ty);
+  }
+  case NEON::BI__builtin_neon_vnegd_s64:
+    return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
+  case NEON::BI__builtin_neon_vpmaxnm_v:
+  case NEON::BI__builtin_neon_vpmaxnmq_v: {
+    Int = Intrinsic::aarch64_neon_fmaxnmp;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
+  }
+  case NEON::BI__builtin_neon_vpminnm_v:
+  case NEON::BI__builtin_neon_vpminnmq_v: {
+    Int = Intrinsic::aarch64_neon_fminnmp;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
+  }
+  case NEON::BI__builtin_neon_vsqrt_v:
+  case NEON::BI__builtin_neon_vsqrtq_v: {
+    Int = Intrinsic::sqrt;
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
+  }
+  case NEON::BI__builtin_neon_vrbit_v:
+  case NEON::BI__builtin_neon_vrbitq_v: {
+    Int = Intrinsic::aarch64_neon_rbit;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
+  }
+  case NEON::BI__builtin_neon_vaddv_u8:
+    // FIXME: These are handled by the AArch64 scalar code.
+    usgn = true;
+    // FALLTHROUGH
+  case NEON::BI__builtin_neon_vaddv_s8: {
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vaddv_u16:
+    usgn = true;
+    // FALLTHROUGH
+  case NEON::BI__builtin_neon_vaddv_s16: {
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vaddvq_u8:
+    usgn = true;
+    // FALLTHROUGH
+  case NEON::BI__builtin_neon_vaddvq_s8: {
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vaddvq_u16:
+    usgn = true;
+    // FALLTHROUGH
+  case NEON::BI__builtin_neon_vaddvq_s16: {
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vmaxv_u8: {
+    Int = Intrinsic::aarch64_neon_umaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vmaxv_u16: {
+    Int = Intrinsic::aarch64_neon_umaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vmaxvq_u8: {
+    Int = Intrinsic::aarch64_neon_umaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vmaxvq_u16: {
+    Int = Intrinsic::aarch64_neon_umaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vmaxv_s8: {
+    Int = Intrinsic::aarch64_neon_smaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vmaxv_s16: {
+    Int = Intrinsic::aarch64_neon_smaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vmaxvq_s8: {
+    Int = Intrinsic::aarch64_neon_smaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vmaxvq_s16: {
+    Int = Intrinsic::aarch64_neon_smaxv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vminv_u8: {
+    Int = Intrinsic::aarch64_neon_uminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vminv_u16: {
+    Int = Intrinsic::aarch64_neon_uminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vminvq_u8: {
+    Int = Intrinsic::aarch64_neon_uminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vminvq_u16: {
+    Int = Intrinsic::aarch64_neon_uminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vminv_s8: {
+    Int = Intrinsic::aarch64_neon_sminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vminv_s16: {
+    Int = Intrinsic::aarch64_neon_sminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vminvq_s8: {
+    Int = Intrinsic::aarch64_neon_sminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 8));
+  }
+  case NEON::BI__builtin_neon_vminvq_s16: {
+    Int = Intrinsic::aarch64_neon_sminv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vmul_n_f64: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+    Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
+    return Builder.CreateFMul(Ops[0], RHS);
+  }
+  case NEON::BI__builtin_neon_vaddlv_u8: {
+    Int = Intrinsic::aarch64_neon_uaddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vaddlv_u16: {
+    Int = Intrinsic::aarch64_neon_uaddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+  }
+  case NEON::BI__builtin_neon_vaddlvq_u8: {
+    Int = Intrinsic::aarch64_neon_uaddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vaddlvq_u16: {
+    Int = Intrinsic::aarch64_neon_uaddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+  }
+  case NEON::BI__builtin_neon_vaddlv_s8: {
+    Int = Intrinsic::aarch64_neon_saddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vaddlv_s16: {
+    Int = Intrinsic::aarch64_neon_saddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+  }
+  case NEON::BI__builtin_neon_vaddlvq_s8: {
+    Int = Intrinsic::aarch64_neon_saddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+    return Builder.CreateTrunc(Ops[0],
+             llvm::IntegerType::get(getLLVMContext(), 16));
+  }
+  case NEON::BI__builtin_neon_vaddlvq_s16: {
+    Int = Intrinsic::aarch64_neon_saddlv;
+    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
+    VTy =
+      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    llvm::Type *Tys[2] = { Ty, VTy };
+    Ops.push_back(EmitScalarExpr(E->getArg(0)));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
+  }
+  case NEON::BI__builtin_neon_vsri_n_v:
+  case NEON::BI__builtin_neon_vsriq_n_v: {
+    Int = Intrinsic::aarch64_neon_vsri;
+    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
+    return EmitNeonCall(Intrin, Ops, "vsri_n");
+  }
+  case NEON::BI__builtin_neon_vsli_n_v:
+  case NEON::BI__builtin_neon_vsliq_n_v: {
+    Int = Intrinsic::aarch64_neon_vsli;
+    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
+    return EmitNeonCall(Intrin, Ops, "vsli_n");
+  }
+  case NEON::BI__builtin_neon_vsra_n_v:
+  case NEON::BI__builtin_neon_vsraq_n_v:
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
+    return Builder.CreateAdd(Ops[0], Ops[1]);
+  case NEON::BI__builtin_neon_vrsra_n_v:
+  case NEON::BI__builtin_neon_vrsraq_n_v: {
+    Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
+    SmallVector<llvm::Value*,2> TmpOps;
+    TmpOps.push_back(Ops[1]);
+    TmpOps.push_back(Ops[2]);
+    Function* F = CGM.getIntrinsic(Int, Ty);
+    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
+    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
+    return Builder.CreateAdd(Ops[0], tmp);
+  }
+    // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
+    // of an Align parameter here.
+  case NEON::BI__builtin_neon_vld1_x2_v:
+  case NEON::BI__builtin_neon_vld1q_x2_v:
+  case NEON::BI__builtin_neon_vld1_x3_v:
+  case NEON::BI__builtin_neon_vld1q_x3_v:
+  case NEON::BI__builtin_neon_vld1_x4_v:
+  case NEON::BI__builtin_neon_vld1q_x4_v: {
+    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+    llvm::Type *Tys[2] = { VTy, PTy };
+    unsigned Int;
+    switch (BuiltinID) {
+    case NEON::BI__builtin_neon_vld1_x2_v:
+    case NEON::BI__builtin_neon_vld1q_x2_v:
+      Int = Intrinsic::aarch64_neon_ld1x2;
+      break;
+    case NEON::BI__builtin_neon_vld1_x3_v:
+    case NEON::BI__builtin_neon_vld1q_x3_v:
+      Int = Intrinsic::aarch64_neon_ld1x3;
+      break;
+    case NEON::BI__builtin_neon_vld1_x4_v:
+    case NEON::BI__builtin_neon_vld1q_x4_v:
+      Int = Intrinsic::aarch64_neon_ld1x4;
+      break;
+    }
+    Function *F = CGM.getIntrinsic(Int, Tys);
+    Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vst1_x2_v:
+  case NEON::BI__builtin_neon_vst1q_x2_v:
+  case NEON::BI__builtin_neon_vst1_x3_v:
+  case NEON::BI__builtin_neon_vst1q_x3_v:
+  case NEON::BI__builtin_neon_vst1_x4_v:
+  case NEON::BI__builtin_neon_vst1q_x4_v: {
+    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
+    llvm::Type *Tys[2] = { VTy, PTy };
+    unsigned Int;
+    switch (BuiltinID) {
+    case NEON::BI__builtin_neon_vst1_x2_v:
+    case NEON::BI__builtin_neon_vst1q_x2_v:
+      Int = Intrinsic::aarch64_neon_st1x2;
+      break;
+    case NEON::BI__builtin_neon_vst1_x3_v:
+    case NEON::BI__builtin_neon_vst1q_x3_v:
+      Int = Intrinsic::aarch64_neon_st1x3;
+      break;
+    case NEON::BI__builtin_neon_vst1_x4_v:
+    case NEON::BI__builtin_neon_vst1q_x4_v:
+      Int = Intrinsic::aarch64_neon_st1x4;
+      break;
+    }
+    SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end());
+    IntOps.push_back(Ops[0]);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, "");
+  }
+  case NEON::BI__builtin_neon_vld1_v:
+  case NEON::BI__builtin_neon_vld1q_v:
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
+    return Builder.CreateLoad(Ops[0]);
+  case NEON::BI__builtin_neon_vst1_v:
+  case NEON::BI__builtin_neon_vst1q_v:
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
+    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  case NEON::BI__builtin_neon_vld1_lane_v:
+  case NEON::BI__builtin_neon_vld1q_lane_v:
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[0] = Builder.CreateLoad(Ops[0]);
+    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
+  case NEON::BI__builtin_neon_vld1_dup_v:
+  case NEON::BI__builtin_neon_vld1q_dup_v: {
+    Value *V = UndefValue::get(Ty);
+    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[0] = Builder.CreateLoad(Ops[0]);
+    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
+    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
+    return EmitNeonSplat(Ops[0], CI);
+  }
+  case NEON::BI__builtin_neon_vst1_lane_v:
+  case NEON::BI__builtin_neon_vst1q_lane_v:
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
+  case NEON::BI__builtin_neon_vld2_v:
+  case NEON::BI__builtin_neon_vld2q_v: {
+    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+    llvm::Type *Tys[2] = { VTy, PTy };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
+    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+                llvm::PointerType::getUnqual(Ops[1]->getType()));
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vld3_v:
+  case NEON::BI__builtin_neon_vld3q_v: {
+    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+    llvm::Type *Tys[2] = { VTy, PTy };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
+    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+                llvm::PointerType::getUnqual(Ops[1]->getType()));
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vld4_v:
+  case NEON::BI__builtin_neon_vld4q_v: {
+    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
+    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+    llvm::Type *Tys[2] = { VTy, PTy };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
+    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+                llvm::PointerType::getUnqual(Ops[1]->getType()));
+    return Builder.CreateStore(Ops[1], Ops[0]);
   }
-  case ARM::BI__builtin_neon_vtrn_v:
-  case ARM::BI__builtin_neon_vtrnq_v: {
+  case NEON::BI__builtin_neon_vld2_dup_v:
+  case NEON::BI__builtin_neon_vld2q_dup_v: {
+    llvm::Type *PTy =
+      llvm::PointerType::getUnqual(VTy->getElementType());
+    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+    llvm::Type *Tys[2] = { VTy, PTy };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
+    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+                llvm::PointerType::getUnqual(Ops[1]->getType()));
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vld3_dup_v:
+  case NEON::BI__builtin_neon_vld3q_dup_v: {
+    llvm::Type *PTy =
+      llvm::PointerType::getUnqual(VTy->getElementType());
+    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+    llvm::Type *Tys[2] = { VTy, PTy };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
+    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+                llvm::PointerType::getUnqual(Ops[1]->getType()));
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vld4_dup_v:
+  case NEON::BI__builtin_neon_vld4q_dup_v: {
+    llvm::Type *PTy =
+      llvm::PointerType::getUnqual(VTy->getElementType());
+    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
+    llvm::Type *Tys[2] = { VTy, PTy };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
+    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
+    Ops[0] = Builder.CreateBitCast(Ops[0],
+                llvm::PointerType::getUnqual(Ops[1]->getType()));
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vld2_lane_v:
+  case NEON::BI__builtin_neon_vld2q_lane_v: {
+    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
+    Ops.push_back(Ops[1]);
+    Ops.erase(Ops.begin()+1);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+    Ops[3] = Builder.CreateZExt(Ops[3],
+                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[1] = Builder.CreateCall(F,
+                ArrayRef<Value*>(Ops).slice(1), "vld2_lane");
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vld3_lane_v:
+  case NEON::BI__builtin_neon_vld3q_lane_v: {
+    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
+    Ops.push_back(Ops[1]);
+    Ops.erase(Ops.begin()+1);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
+    Ops[4] = Builder.CreateZExt(Ops[4],
+                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[1] = Builder.CreateCall(F,
+                ArrayRef<Value*>(Ops).slice(1), "vld3_lane");
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vld4_lane_v:
+  case NEON::BI__builtin_neon_vld4q_lane_v: {
+    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
+    Ops.push_back(Ops[1]);
+    Ops.erase(Ops.begin()+1);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
+    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
+    Ops[5] = Builder.CreateZExt(Ops[5],
+                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[1] = Builder.CreateCall(F,
+                ArrayRef<Value*>(Ops).slice(1), "vld4_lane");
+    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    return Builder.CreateStore(Ops[1], Ops[0]);
+  }
+  case NEON::BI__builtin_neon_vst2_v:
+  case NEON::BI__builtin_neon_vst2q_v: {
+    Ops.push_back(Ops[0]);
+    Ops.erase(Ops.begin());
+    llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
+                        Ops, "");
+  }
+  case NEON::BI__builtin_neon_vst2_lane_v:
+  case NEON::BI__builtin_neon_vst2q_lane_v: {
+    Ops.push_back(Ops[0]);
+    Ops.erase(Ops.begin());
+    Ops[2] = Builder.CreateZExt(Ops[2],
+                llvm::IntegerType::get(getLLVMContext(), 64));
+    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
+                        Ops, "");
+  }
+  case NEON::BI__builtin_neon_vst3_v:
+  case NEON::BI__builtin_neon_vst3q_v: {
+    Ops.push_back(Ops[0]);
+    Ops.erase(Ops.begin());
+    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
+                        Ops, "");
+  }
+  case NEON::BI__builtin_neon_vst3_lane_v:
+  case NEON::BI__builtin_neon_vst3q_lane_v: {
+    Ops.push_back(Ops[0]);
+    Ops.erase(Ops.begin());
+    Ops[3] = Builder.CreateZExt(Ops[3],
+                llvm::IntegerType::get(getLLVMContext(), 64));
+    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
+                        Ops, "");
+  }
+  case NEON::BI__builtin_neon_vst4_v:
+  case NEON::BI__builtin_neon_vst4q_v: {
+    Ops.push_back(Ops[0]);
+    Ops.erase(Ops.begin());
+    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
+                        Ops, "");
+  }
+  case NEON::BI__builtin_neon_vst4_lane_v:
+  case NEON::BI__builtin_neon_vst4q_lane_v: {
+    Ops.push_back(Ops[0]);
+    Ops.erase(Ops.begin());
+    Ops[4] = Builder.CreateZExt(Ops[4],
+                llvm::IntegerType::get(getLLVMContext(), 64));
+    llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
+                        Ops, "");
+  }
+  case NEON::BI__builtin_neon_vtrn_v:
+  case NEON::BI__builtin_neon_vtrnq_v: {
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(Builder.getInt32(i+vi));
-        Indices.push_back(Builder.getInt32(i+e+vi));
+        Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
+        Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
@@ -4875,12 +5550,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     }
     return SV;
   }
-  case ARM::BI__builtin_neon_vuzp_v:
-  case ARM::BI__builtin_neon_vuzpq_v: {
+  case NEON::BI__builtin_neon_vuzp_v:
+  case NEON::BI__builtin_neon_vuzpq_v: {
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -4894,12 +5569,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     }
     return SV;
   }
-  case ARM::BI__builtin_neon_vzip_v:
-  case ARM::BI__builtin_neon_vzipq_v: {
+  case NEON::BI__builtin_neon_vzip_v:
+  case NEON::BI__builtin_neon_vzipq_v: {
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -4914,6 +5589,48 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     }
     return SV;
   }
+  case NEON::BI__builtin_neon_vqtbl1q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
+                        Ops, "vtbl1");
+  }
+  case NEON::BI__builtin_neon_vqtbl2q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
+                        Ops, "vtbl2");
+  }
+  case NEON::BI__builtin_neon_vqtbl3q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
+                        Ops, "vtbl3");
+  }
+  case NEON::BI__builtin_neon_vqtbl4q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
+                        Ops, "vtbl4");
+  }
+  case NEON::BI__builtin_neon_vqtbx1q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
+                        Ops, "vtbx1");
+  }
+  case NEON::BI__builtin_neon_vqtbx2q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
+                        Ops, "vtbx2");
+  }
+  case NEON::BI__builtin_neon_vqtbx3q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
+                        Ops, "vtbx3");
+  }
+  case NEON::BI__builtin_neon_vqtbx4q_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
+                        Ops, "vtbx4");
+  }
+  case NEON::BI__builtin_neon_vsqadd_v:
+  case NEON::BI__builtin_neon_vsqaddq_v: {
+    Int = Intrinsic::aarch64_neon_usqadd;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
+  }
+  case NEON::BI__builtin_neon_vuqadd_v:
+  case NEON::BI__builtin_neon_vuqaddq_v: {
+    Int = Intrinsic::aarch64_neon_suqadd;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
+  }
   }
 }
 
@@ -4969,7 +5686,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   }
 
   switch (BuiltinID) {
-  default: return 0;
+  default: return nullptr;
+  case X86::BI_mm_prefetch: {
+    Value *Address = EmitScalarExpr(E->getArg(0));
+    Value *RW = ConstantInt::get(Int32Ty, 0);
+    Value *Locality = EmitScalarExpr(E->getArg(1));
+    Value *Data = ConstantInt::get(Int32Ty, 1);
+    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
+    return Builder.CreateCall4(F, Address, RW, Locality, Data);
+  }
   case X86::BI__builtin_ia32_vec_init_v8qi:
   case X86::BI__builtin_ia32_vec_init_v4hi:
   case X86::BI__builtin_ia32_vec_init_v2si:
@@ -5000,7 +5725,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
     // extract (0, 1)
     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
-    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
+    llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
 
     // cast pointer to i64 & store
@@ -5138,7 +5863,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {
-    const char *name = 0;
+    const char *name = nullptr;
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
     switch(BuiltinID) {
     default: llvm_unreachable("Unsupported intrinsic!");
@@ -5207,7 +5932,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   Intrinsic::ID ID = Intrinsic::not_intrinsic;
 
   switch (BuiltinID) {
-  default: return 0;
+  default: return nullptr;
 
   // vec_ld, vec_lvsl, vec_lvsr
   case PPC::BI__builtin_altivec_lvx:
@@ -5285,3 +6010,83 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   }
   }
 }
+
+// Emit an intrinsic that has 1 float or double.
+static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
+                                 const CallExpr *E,
+                                 unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, Src0);
+}
+
+// Emit an intrinsic that has 3 float or double operands.
+static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
+                                   const CallExpr *E,
+                                   unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall3(F, Src0, Src1, Src2);
+}
+
+Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
+                                            const CallExpr *E) {
+  switch (BuiltinID) {
+  case R600::BI__builtin_amdgpu_div_scale:
+  case R600::BI__builtin_amdgpu_div_scalef: {
+    // Translate from the intrinsics's struct return to the builtin's out
+    // argument.
+
+    std::pair<llvm::Value *, unsigned> FlagOutPtr
+      = EmitPointerWithAlignment(E->getArg(3));
+
+    llvm::Value *X = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
+    llvm::Value *Z = EmitScalarExpr(E->getArg(2));
+
+    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
+                                           X->getType());
+
+    llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z);
+
+    llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
+    llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
+
+    llvm::Type *RealFlagType
+      = FlagOutPtr.first->getType()->getPointerElementType();
+
+    llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
+    llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first);
+    FlagStore->setAlignment(FlagOutPtr.second);
+    return Result;
+  }
+  case R600::BI__builtin_amdgpu_div_fmas:
+  case R600::BI__builtin_amdgpu_div_fmasf:
+    return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fmas);
+  case R600::BI__builtin_amdgpu_div_fixup:
+  case R600::BI__builtin_amdgpu_div_fixupf:
+    return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
+  case R600::BI__builtin_amdgpu_trig_preop:
+  case R600::BI__builtin_amdgpu_trig_preopf: {
+    Value *Src0 = EmitScalarExpr(E->getArg(0));
+    Value *Src1 = EmitScalarExpr(E->getArg(1));
+    Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_trig_preop, Src0->getType());
+    return Builder.CreateCall2(F, Src0, Src1);
+  }
+  case R600::BI__builtin_amdgpu_rcp:
+  case R600::BI__builtin_amdgpu_rcpf:
+    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
+  case R600::BI__builtin_amdgpu_rsq:
+  case R600::BI__builtin_amdgpu_rsqf:
+    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
+  case R600::BI__builtin_amdgpu_rsq_clamped:
+  case R600::BI__builtin_amdgpu_rsq_clampedf:
+    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
+   default:
+    return nullptr;
+  }
+}
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 0ebf1aaa44b8..fb11751fc91e 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -17,9 +17,9 @@
 #include "CodeGenModule.h"
 #include "clang/AST/Decl.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/Support/CallSite.h"
 #include <vector>
 
 using namespace clang;
@@ -39,7 +39,7 @@ private:
 public:
   CGNVCUDARuntime(CodeGenModule &CGM);
 
-  void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
+  void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args) override;
 };
 
 }
diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp
index eaf31bb6f67f..29e0a91a63fe 100644
--- a/lib/CodeGen/CGCUDARuntime.cpp
+++ b/lib/CodeGen/CGCUDARuntime.cpp
@@ -31,12 +31,13 @@ RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
 
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock);
+  CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock,
+                           /*TrueCount=*/0);
 
   eval.begin(CGF);
   CGF.EmitBlock(ConfigOKBlock);
 
-  const Decl *TargetDecl = 0;
+  const Decl *TargetDecl = nullptr;
   if (const ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E->getCallee())) {
     if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(CE->getSubExpr())) {
       TargetDecl = DRE->getDecl();
@@ -51,5 +52,5 @@ RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
   CGF.EmitBlock(ContBlock);
   eval.end(CGF);
 
-  return RValue::get(0);
+  return RValue::get(nullptr);
 }
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 2f8a17afbcd1..545c5ef9f827 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -35,7 +35,7 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
     return true;
 
   // Producing an alias to a base class ctor/dtor can degrade debug quality
-  // as the debugger cannot tell them appart.
+  // as the debugger cannot tell them apart.
   if (getCodeGenOpts().OptimizationLevel == 0)
     return true;
 
@@ -44,6 +44,10 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
   if (!D->hasTrivialBody())
     return true;
 
+  // For exported destructors, we need a full definition.
+  if (D->hasAttr<DLLExportAttr>())
+    return true;
+
   const CXXRecordDecl *Class = D->getParent();
 
   // If we need to manipulate a VTT parameter, give up.
@@ -56,22 +60,20 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
 
   // If any field has a non-trivial destructor, we have to emit the
   // destructor separately.
-  for (CXXRecordDecl::field_iterator I = Class->field_begin(),
-         E = Class->field_end(); I != E; ++I)
+  for (const auto *I : Class->fields())
     if (I->getType().isDestructedType())
       return true;
 
   // Try to find a unique base class with a non-trivial destructor.
-  const CXXRecordDecl *UniqueBase = 0;
-  for (CXXRecordDecl::base_class_const_iterator I = Class->bases_begin(),
-         E = Class->bases_end(); I != E; ++I) {
+  const CXXRecordDecl *UniqueBase = nullptr;
+  for (const auto &I : Class->bases()) {
 
     // We're in the base destructor, so skip virtual bases.
-    if (I->isVirtual()) continue;
+    if (I.isVirtual()) continue;
 
     // Skip base classes with trivial destructors.
-    const CXXRecordDecl *Base
-      = cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
+    const auto *Base =
+        cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
     if (Base->hasTrivialDestructor()) continue;
 
     // If we've already found a base class with a non-trivial
@@ -113,7 +115,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
   if (!getCodeGenOpts().CXXCtorDtorAliases)
     return true;
 
-  // The alias will use the linkage of the referrent.  If we can't
+  // The alias will use the linkage of the referent.  If we can't
   // support aliases with that linkage, fail.
   llvm::GlobalValue::LinkageTypes Linkage = getFunctionLinkage(AliasDecl);
 
@@ -136,20 +138,20 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
   llvm::PointerType *AliasType
     = getTypes().GetFunctionType(AliasDecl)->getPointerTo();
 
-  // Find the referrent.  Some aliases might require a bitcast, in
+  // Find the referent.  Some aliases might require a bitcast, in
   // which case the caller is responsible for ensuring the soundness
   // of these semantics.
-  llvm::GlobalValue *Ref = cast<llvm::GlobalValue>(GetAddrOfGlobal(TargetDecl));
+  auto *Ref = cast<llvm::GlobalValue>(GetAddrOfGlobal(TargetDecl));
   llvm::Constant *Aliasee = Ref;
   if (Ref->getType() != AliasType)
     Aliasee = llvm::ConstantExpr::getBitCast(Ref, AliasType);
 
   // Instead of creating as alias to a linkonce_odr, replace all of the uses
-  // of the aliassee.
+  // of the aliasee.
   if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) &&
      (TargetLinkage != llvm::GlobalValue::AvailableExternallyLinkage ||
       !TargetDecl.getDecl()->hasAttr<AlwaysInlineAttr>())) {
-    // FIXME: An extern template instanciation will create functions with
+    // FIXME: An extern template instantiation will create functions with
     // linkage "AvailableExternally". In libc++, some classes also define
     // members with attribute "AlwaysInline" and expect no reference to
     // be generated. It is desirable to reenable this optimisation after
@@ -174,8 +176,8 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
     return true;
 
   // Create the alias with no name.
-  llvm::GlobalAlias *Alias = 
-    new llvm::GlobalAlias(AliasType, Linkage, "", Aliasee, &getModule());
+  auto *Alias = llvm::GlobalAlias::create(AliasType->getElementType(), 0,
+                                          Linkage, "", Aliasee, &getModule());
 
   // Switch any previous uses to the alias.
   if (Entry) {
@@ -196,11 +198,13 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
 
 void CodeGenModule::EmitCXXConstructor(const CXXConstructorDecl *ctor,
                                        CXXCtorType ctorType) {
-  // The complete constructor is equivalent to the base constructor
-  // for classes with no virtual bases.  Try to emit it as an alias.
-  if (getTarget().getCXXABI().hasConstructorVariants() &&
-      !ctor->getParent()->getNumVBases() &&
-      (ctorType == Ctor_Complete || ctorType == Ctor_Base)) {
+  if (!getTarget().getCXXABI().hasConstructorVariants()) {
+    // If there are no constructor variants, always emit the complete destructor.
+    ctorType = Ctor_Complete;
+  } else if (!ctor->getParent()->getNumVBases() &&
+             (ctorType == Ctor_Complete || ctorType == Ctor_Base)) {
+    // The complete constructor is equivalent to the base constructor
+    // for classes with no virtual bases.  Try to emit it as an alias.
     bool ProducedAlias =
         !TryEmitDefinitionAsAlias(GlobalDecl(ctor, Ctor_Complete),
                                   GlobalDecl(ctor, Ctor_Base), true);
@@ -211,20 +215,21 @@ void CodeGenModule::EmitCXXConstructor(const CXXConstructorDecl *ctor,
   const CGFunctionInfo &fnInfo =
     getTypes().arrangeCXXConstructorDeclaration(ctor, ctorType);
 
-  llvm::Function *fn =
-    cast<llvm::Function>(GetAddrOfCXXConstructor(ctor, ctorType, &fnInfo));
+  auto *fn = cast<llvm::Function>(
+      GetAddrOfCXXConstructor(ctor, ctorType, &fnInfo, true));
   setFunctionLinkage(GlobalDecl(ctor, ctorType), fn);
 
   CodeGenFunction(*this).GenerateCode(GlobalDecl(ctor, ctorType), fn, fnInfo);
 
-  SetFunctionDefinitionAttributes(ctor, fn);
+  setFunctionDefinitionAttributes(ctor, fn);
   SetLLVMFunctionAttributesForDefinition(ctor, fn);
 }
 
 llvm::GlobalValue *
 CodeGenModule::GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor,
                                        CXXCtorType ctorType,
-                                       const CGFunctionInfo *fnInfo) {
+                                       const CGFunctionInfo *fnInfo,
+                                       bool DontDefer) {
   GlobalDecl GD(ctor, ctorType);
   
   StringRef name = getMangledName(GD);
@@ -236,7 +241,8 @@ CodeGenModule::GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor,
 
   llvm::FunctionType *fnType = getTypes().GetFunctionType(*fnInfo);
   return cast<llvm::Function>(GetOrCreateLLVMFunction(name, fnType, GD,
-                                                      /*ForVTable=*/false));
+                                                      /*ForVTable=*/false,
+                                                      DontDefer));
 }
 
 void CodeGenModule::EmitCXXDestructor(const CXXDestructorDecl *dtor,
@@ -266,13 +272,13 @@ void CodeGenModule::EmitCXXDestructor(const CXXDestructorDecl *dtor,
   const CGFunctionInfo &fnInfo =
     getTypes().arrangeCXXDestructor(dtor, dtorType);
 
-  llvm::Function *fn =
-    cast<llvm::Function>(GetAddrOfCXXDestructor(dtor, dtorType, &fnInfo));
+  auto *fn = cast<llvm::Function>(
+      GetAddrOfCXXDestructor(dtor, dtorType, &fnInfo, nullptr, true));
   setFunctionLinkage(GlobalDecl(dtor, dtorType), fn);
 
   CodeGenFunction(*this).GenerateCode(GlobalDecl(dtor, dtorType), fn, fnInfo);
 
-  SetFunctionDefinitionAttributes(dtor, fn);
+  setFunctionDefinitionAttributes(dtor, fn);
   SetLLVMFunctionAttributesForDefinition(dtor, fn);
 }
 
@@ -280,7 +286,8 @@ llvm::GlobalValue *
 CodeGenModule::GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
                                       CXXDtorType dtorType,
                                       const CGFunctionInfo *fnInfo,
-                                      llvm::FunctionType *fnType) {
+                                      llvm::FunctionType *fnType,
+                                      bool DontDefer) {
   GlobalDecl GD(dtor, dtorType);
 
   StringRef name = getMangledName(GD);
@@ -292,7 +299,8 @@ CodeGenModule::GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
     fnType = getTypes().GetFunctionType(*fnInfo);
   }
   return cast<llvm::Function>(GetOrCreateLLVMFunction(name, fnType, GD,
-                                                      /*ForVTable=*/false));
+                                                      /*ForVTable=*/false,
+                                                      DontDefer));
 }
 
 static llvm::Value *BuildAppleKextVirtualCall(CodeGenFunction &CGF,
@@ -331,9 +339,9 @@ CodeGenFunction::BuildAppleKextVirtualCall(const CXXMethodDecl *MD,
   QualType T = QualType(QTy, 0);
   const RecordType *RT = T->getAs<RecordType>();
   assert(RT && "BuildAppleKextVirtualCall - Qual type must be record");
-  const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
-  
-  if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD))
+  const auto *RD = cast<CXXRecordDecl>(RT->getDecl());
+
+  if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD))
     return BuildAppleKextVirtualDestructorCall(DD, Dtor_Complete, RD);
 
   return ::BuildAppleKextVirtualCall(*this, MD, Ty, RD);
@@ -346,7 +354,7 @@ CodeGenFunction::BuildAppleKextVirtualDestructorCall(
                                             const CXXDestructorDecl *DD,
                                             CXXDtorType Type,
                                             const CXXRecordDecl *RD) {
-  const CXXMethodDecl *MD = cast<CXXMethodDecl>(DD);
+  const auto *MD = cast<CXXMethodDecl>(DD);
   // FIXME. Dtor_Base dtor is always direct!!
   // It need be somehow inline expanded into the caller.
   // -O does that. But need to support -O0 as well.
@@ -357,5 +365,5 @@ CodeGenFunction::BuildAppleKextVirtualDestructorCall(
     llvm::Type *Ty = CGM.getTypes().GetFunctionType(FInfo);
     return ::BuildAppleKextVirtualCall(*this, GlobalDecl(DD, Type), Ty, RD);
   }
-  return 0;
+  return nullptr;
 }
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 412b27814ac8..55ddd666c490 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -28,6 +28,41 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) {
     << S;
 }
 
+bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const {
+  // If RD has a non-trivial move or copy constructor, we cannot copy the
+  // argument.
+  if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor())
+    return false;
+
+  // If RD has a non-trivial destructor, we cannot copy the argument.
+  if (RD->hasNonTrivialDestructor())
+    return false;
+
+  // We can only copy the argument if there exists at least one trivial,
+  // non-deleted copy or move constructor.
+  // FIXME: This assumes that all lazily declared copy and move constructors are
+  // not deleted.  This assumption might not be true in some corner cases.
+  bool CopyDeleted = false;
+  bool MoveDeleted = false;
+  for (const CXXConstructorDecl *CD : RD->ctors()) {
+    if (CD->isCopyConstructor() || CD->isMoveConstructor()) {
+      assert(CD->isTrivial());
+      // We had at least one undeleted trivial copy or move ctor.  Return
+      // directly.
+      if (!CD->isDeleted())
+        return true;
+      if (CD->isCopyConstructor())
+        CopyDeleted = true;
+      else
+        MoveDeleted = true;
+    }
+  }
+
+  // If all trivial copy and move constructors are deleted, we cannot copy the
+  // argument.
+  return !(CopyDeleted && MoveDeleted);
+}
+
 llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) {
   return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T));
 }
@@ -37,10 +72,9 @@ CGCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
   return CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
 }
 
-llvm::Value *CGCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
-                                                       llvm::Value *&This,
-                                                       llvm::Value *MemPtr,
-                                                 const MemberPointerType *MPT) {
+llvm::Value *CGCXXABI::EmitLoadOfMemberFunctionPointer(
+    CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+    llvm::Value *MemPtr, const MemberPointerType *MPT) {
   ErrorUnsupportedABI(CGF, "calls through member pointers");
 
   const FunctionProtoType *FPT = 
@@ -52,10 +86,10 @@ llvm::Value *CGCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
   return llvm::Constant::getNullValue(FTy->getPointerTo());
 }
 
-llvm::Value *CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
-                                                    llvm::Value *Base,
-                                                    llvm::Value *MemPtr,
-                                              const MemberPointerType *MPT) {
+llvm::Value *
+CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+                                       llvm::Value *Base, llvm::Value *MemPtr,
+                                       const MemberPointerType *MPT) {
   ErrorUnsupportedABI(CGF, "loads of member pointers");
   llvm::Type *Ty = CGF.ConvertType(MPT->getPointeeType())->getPointerTo();
   return llvm::Constant::getNullValue(Ty);
@@ -116,13 +150,13 @@ bool CGCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
   return true;
 }
 
-void CGCXXABI::BuildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
+void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
 
   // FIXME: I'm not entirely sure I like using a fake decl just for code
   // generation. Maybe we can come up with a better way?
   ImplicitParamDecl *ThisDecl
-    = ImplicitParamDecl::Create(CGM.getContext(), 0, MD->getLocation(),
+    = ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(),
                                 &CGM.getContext().Idents.get("this"),
                                 MD->getThisType(CGM.getContext()));
   params.push_back(ThisDecl);
@@ -160,7 +194,7 @@ llvm::Value *CGCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
                                              QualType ElementType) {
   // Should never be called.
   ErrorUnsupportedABI(CGF, "array cookie initialization");
-  return 0;
+  return nullptr;
 }
 
 bool CGCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
@@ -194,7 +228,7 @@ void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *ptr,
   // If we don't need an array cookie, bail out early.
   if (!requiresArrayCookie(expr, eltTy)) {
     allocPtr = ptr;
-    numElements = 0;
+    numElements = nullptr;
     cookieSize = CharUnits::Zero();
     return;
   }
@@ -273,16 +307,17 @@ CGCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
     llvm_unreachable("shouldn't be called in this ABI");
 
   ErrorUnsupportedABI(CGF, "complete object detection in ctor");
-  return 0;
+  return nullptr;
 }
 
 void CGCXXABI::EmitThreadLocalInitFuncs(
-    llvm::ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
+    ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
     llvm::Function *InitFunc) {
 }
 
-LValue CGCXXABI::EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
-                                          const DeclRefExpr *DRE) {
+LValue CGCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
+                                              const VarDecl *VD,
+                                              QualType LValType) {
   ErrorUnsupportedABI(CGF, "odr-use of thread_local global");
   return LValue();
 }
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 9e9a2a7aaf9b..91e49707bae6 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -19,29 +19,29 @@
 #include "clang/Basic/LLVM.h"
 
 namespace llvm {
-  class Constant;
-  class Type;
-  class Value;
+class Constant;
+class Type;
+class Value;
 }
 
 namespace clang {
-  class CastExpr;
-  class CXXConstructorDecl;
-  class CXXDestructorDecl;
-  class CXXMethodDecl;
-  class CXXRecordDecl;
-  class FieldDecl;
-  class MangleContext;
+class CastExpr;
+class CXXConstructorDecl;
+class CXXDestructorDecl;
+class CXXMethodDecl;
+class CXXRecordDecl;
+class FieldDecl;
+class MangleContext;
 
 namespace CodeGen {
-  class CodeGenFunction;
-  class CodeGenModule;
+class CodeGenFunction;
+class CodeGenModule;
 
 /// \brief Implements C++ ABI-specific code generation functions.
 class CGCXXABI {
 protected:
   CodeGenModule &CGM;
-  OwningPtr<MangleContext> MangleCtx;
+  std::unique_ptr<MangleContext> MangleCtx;
 
   CGCXXABI(CodeGenModule &CGM)
     : CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {}
@@ -60,15 +60,6 @@ protected:
   /// Get a null value for unsupported member pointers.
   llvm::Constant *GetBogusMemberPointer(QualType T);
 
-  // FIXME: Every place that calls getVTT{Decl,Value} is something
-  // that needs to be abstracted properly.
-  ImplicitParamDecl *&getVTTDecl(CodeGenFunction &CGF) {
-    return CGF.CXXStructorImplicitParamDecl;
-  }
-  llvm::Value *&getVTTValue(CodeGenFunction &CGF) {
-    return CGF.CXXStructorImplicitParamValue;
-  }
-
   ImplicitParamDecl *&getStructorImplicitParamDecl(CodeGenFunction &CGF) {
     return CGF.CXXStructorImplicitParamDecl;
   }
@@ -76,11 +67,8 @@ protected:
     return CGF.CXXStructorImplicitParamValue;
   }
 
-  /// Build a parameter variable suitable for 'this'.
-  void BuildThisParam(CodeGenFunction &CGF, FunctionArgList &Params);
-
   /// Perform prolog initialization of the parameter variable suitable
-  /// for 'this' emitted by BuildThisParam.
+  /// for 'this' emitted by buildThisParam.
   void EmitThisParam(CodeGenFunction &CGF);
 
   ASTContext &getContext() const { return CGM.getContext(); }
@@ -105,8 +93,9 @@ public:
   /// when called virtually, and code generation does not support the case.
   virtual bool HasThisReturn(GlobalDecl GD) const { return false; }
 
-  /// Returns true if the given record type should be returned indirectly.
-  virtual bool isReturnTypeIndirect(const CXXRecordDecl *RD) const = 0;
+  /// If the C++ ABI requires the given type be returned in a particular way,
+  /// this method sets RetAI and returns true.
+  virtual bool classifyReturnType(CGFunctionInfo &FI) const = 0;
 
   /// Specify how one should pass an argument of a record type.
   enum RecordArgABI {
@@ -123,9 +112,17 @@ public:
     RAA_Indirect
   };
 
+  /// Returns true if C++ allows us to copy the memory of an object of type RD
+  /// when it is passed as an argument.
+  bool canCopyArgument(const CXXRecordDecl *RD) const;
+
   /// Returns how an argument of the given record type should be passed.
   virtual RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const = 0;
 
+  /// Returns true if the implicit 'sret' parameter comes after the implicit
+  /// 'this' parameter of C++ instance methods.
+  virtual bool isSRetParameterAfterThis() const { return false; }
+
   /// Find the LLVM type used to represent the given member pointer
   /// type.
   virtual llvm::Type *
@@ -134,17 +131,15 @@ public:
   /// Load a member function from an object and a member function
   /// pointer.  Apply the this-adjustment and set 'This' to the
   /// adjusted value.
-  virtual llvm::Value *
-  EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
-                                  llvm::Value *&This,
-                                  llvm::Value *MemPtr,
-                                  const MemberPointerType *MPT);
+  virtual llvm::Value *EmitLoadOfMemberFunctionPointer(
+      CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+      llvm::Value *MemPtr, const MemberPointerType *MPT);
 
   /// Calculate an l-value from an object and a data member pointer.
-  virtual llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF,
-                                                    llvm::Value *Base,
-                                                    llvm::Value *MemPtr,
-                                            const MemberPointerType *MPT);
+  virtual llvm::Value *
+  EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+                               llvm::Value *Base, llvm::Value *MemPtr,
+                               const MemberPointerType *MPT);
 
   /// Perform a derived-to-base, base-to-derived, or bitcast member
   /// pointer conversion.
@@ -212,6 +207,30 @@ public:
                                               llvm::Value *ptr,
                                               QualType type) = 0;
 
+  virtual llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) = 0;
+
+  virtual bool shouldTypeidBeNullChecked(bool IsDeref,
+                                         QualType SrcRecordTy) = 0;
+  virtual void EmitBadTypeidCall(CodeGenFunction &CGF) = 0;
+  virtual llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
+                                  llvm::Value *ThisPtr,
+                                  llvm::Type *StdTypeInfoPtrTy) = 0;
+
+  virtual bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
+                                                  QualType SrcRecordTy) = 0;
+
+  virtual llvm::Value *
+  EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
+                      QualType SrcRecordTy, QualType DestTy,
+                      QualType DestRecordTy, llvm::BasicBlock *CastEnd) = 0;
+
+  virtual llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF,
+                                             llvm::Value *Value,
+                                             QualType SrcRecordTy,
+                                             QualType DestTy) = 0;
+
+  virtual bool EmitBadCastCall(CodeGenFunction &CGF) = 0;
+
   virtual llvm::Value *GetVirtualBaseClassOffset(CodeGenFunction &CGF,
                                                  llvm::Value *This,
                                                  const CXXRecordDecl *ClassDecl,
@@ -272,23 +291,27 @@ public:
   }
 
   /// Perform ABI-specific "this" argument adjustment required prior to
-  /// a virtual function call.
-  virtual llvm::Value *adjustThisArgumentForVirtualCall(CodeGenFunction &CGF,
-                                                        GlobalDecl GD,
-                                                        llvm::Value *This) {
+  /// a call of a virtual function.
+  /// The "VirtualCall" argument is true iff the call itself is virtual.
+  virtual llvm::Value *
+  adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
+                                           llvm::Value *This,
+                                           bool VirtualCall) {
     return This;
   }
 
-  /// Build the ABI-specific portion of the parameter list for a
-  /// function.  This generally involves a 'this' parameter and
-  /// possibly some extra data for constructors and destructors.
+  /// Build a parameter variable suitable for 'this'.
+  void buildThisParam(CodeGenFunction &CGF, FunctionArgList &Params);
+
+  /// Insert any ABI-specific implicit parameters into the parameter list for a
+  /// function.  This generally involves extra data for constructors and
+  /// destructors.
   ///
   /// ABIs may also choose to override the return type, which has been
   /// initialized with the type of 'this' if HasThisReturn(CGF.CurGD) is true or
   /// the formal return type of the function otherwise.
-  virtual void BuildInstanceFunctionParams(CodeGenFunction &CGF,
-                                           QualType &ResTy,
-                                           FunctionArgList &Params) = 0;
+  virtual void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
+                                         FunctionArgList &Params) = 0;
 
   /// Perform ABI-specific "this" parameter adjustment in a virtual function
   /// prologue.
@@ -300,14 +323,20 @@ public:
   /// Emit the ABI-specific prolog for the function.
   virtual void EmitInstanceFunctionProlog(CodeGenFunction &CGF) = 0;
 
-  /// Emit the constructor call. Return the function that is called.
-  virtual void EmitConstructorCall(CodeGenFunction &CGF,
-                                   const CXXConstructorDecl *D,
-                                   CXXCtorType Type,
-                                   bool ForVirtualBase, bool Delegating,
-                                   llvm::Value *This,
-                                   CallExpr::const_arg_iterator ArgBeg,
-                                   CallExpr::const_arg_iterator ArgEnd) = 0;
+  /// Add any ABI-specific implicit arguments needed to call a constructor.
+  ///
+  /// \return The number of args added to the call, which is typically zero or
+  /// one.
+  virtual unsigned
+  addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
+                             CXXCtorType Type, bool ForVirtualBase,
+                             bool Delegating, CallArgList &Args) = 0;
+
+  /// Emit the destructor call.
+  virtual void EmitDestructorCall(CodeGenFunction &CGF,
+                                  const CXXDestructorDecl *DD, CXXDtorType Type,
+                                  bool ForVirtualBase, bool Delegating,
+                                  llvm::Value *This) = 0;
 
   /// Emits the VTable definitions required for the given record type.
   virtual void emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -354,7 +383,8 @@ public:
   /// base tables.
   virtual void emitVirtualInheritanceTables(const CXXRecordDecl *RD) = 0;
 
-  virtual void setThunkLinkage(llvm::Function *Thunk, bool ForVTable) = 0;
+  virtual void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
+                               GlobalDecl GD, bool ReturnAdjustment) = 0;
 
   virtual llvm::Value *performThisAdjustment(CodeGenFunction &CGF,
                                              llvm::Value *This,
@@ -373,10 +403,6 @@ public:
   /// Gets the deleted virtual member call name.
   virtual StringRef GetDeletedVirtualCallName() = 0;
 
-  /// \brief Returns true iff static data members that are initialized in the
-  /// class definition should have linkonce linkage.
-  virtual bool isInlineInitializedStaticDataMemberLinkOnce() { return false; }
-
   /**************************** Array cookies ******************************/
 
   /// Returns the extra size required in order to store the array
@@ -479,14 +505,15 @@ public:
   ///        initialization or non-trivial destruction for thread_local
   ///        variables, a function to perform the initialization. Otherwise, 0.
   virtual void EmitThreadLocalInitFuncs(
-      llvm::ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
+      ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
       llvm::Function *InitFunc);
 
   /// Emit a reference to a non-local thread_local variable (including
   /// triggering the initialization of all thread_local variables in its
   /// translation unit).
-  virtual LValue EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
-                                            const DeclRefExpr *DRE);
+  virtual LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
+                                              const VarDecl *VD,
+                                              QualType LValType);
 };
 
 // Create an instance of a C++ ABI class:
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 22f2467021e1..17c3354f93e9 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -26,10 +26,10 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace clang;
 using namespace CodeGen;
@@ -79,23 +79,26 @@ const CGFunctionInfo &
 CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
   // When translating an unprototyped function type, always use a
   // variadic type.
-  return arrangeLLVMFunctionInfo(FTNP->getResultType().getUnqualifiedType(),
-                                 None, FTNP->getExtInfo(), RequiredArgs(0));
+  return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
+                                 false, None, FTNP->getExtInfo(),
+                                 RequiredArgs(0));
 }
 
 /// Arrange the LLVM function layout for a value of the given function
 /// type, on top of any implicit parameters already stored.  Use the
 /// given ExtInfo instead of the ExtInfo from the function type.
 static const CGFunctionInfo &arrangeLLVMFunctionInfo(CodeGenTypes &CGT,
+                                                     bool IsInstanceMethod,
                                        SmallVectorImpl<CanQualType> &prefix,
                                              CanQual<FunctionProtoType> FTP,
                                               FunctionType::ExtInfo extInfo) {
   RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
   // FIXME: Kill copy.
-  for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
-    prefix.push_back(FTP->getArgType(i));
-  CanQualType resultType = FTP->getResultType().getUnqualifiedType();
-  return CGT.arrangeLLVMFunctionInfo(resultType, prefix, extInfo, required);
+  for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
+    prefix.push_back(FTP->getParamType(i));
+  CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
+  return CGT.arrangeLLVMFunctionInfo(resultType, IsInstanceMethod, prefix,
+                                     extInfo, required);
 }
 
 /// Arrange the argument and result information for a free function (i.e.
@@ -103,7 +106,7 @@ static const CGFunctionInfo &arrangeLLVMFunctionInfo(CodeGenTypes &CGT,
 static const CGFunctionInfo &arrangeFreeFunctionType(CodeGenTypes &CGT,
                                       SmallVectorImpl<CanQualType> &prefix,
                                             CanQual<FunctionProtoType> FTP) {
-  return arrangeLLVMFunctionInfo(CGT, prefix, FTP, FTP->getExtInfo());
+  return arrangeLLVMFunctionInfo(CGT, false, prefix, FTP, FTP->getExtInfo());
 }
 
 /// Arrange the argument and result information for a free function (i.e.
@@ -112,7 +115,7 @@ static const CGFunctionInfo &arrangeCXXMethodType(CodeGenTypes &CGT,
                                       SmallVectorImpl<CanQualType> &prefix,
                                             CanQual<FunctionProtoType> FTP) {
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
-  return arrangeLLVMFunctionInfo(CGT, prefix, FTP, extInfo);
+  return arrangeLLVMFunctionInfo(CGT, true, prefix, FTP, extInfo);
 }
 
 /// Arrange the argument and result information for a value of the
@@ -123,7 +126,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
   return ::arrangeFreeFunctionType(*this, argTypes, FTP);
 }
 
-static CallingConv getCallingConventionForDecl(const Decl *D) {
+static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
   // Set the appropriate calling convention for the Function.
   if (D->hasAttr<StdCallAttr>())
     return CC_X86StdCall;
@@ -146,9 +149,32 @@ static CallingConv getCallingConventionForDecl(const Decl *D) {
   if (D->hasAttr<IntelOclBiccAttr>())
     return CC_IntelOclBicc;
 
+  if (D->hasAttr<MSABIAttr>())
+    return IsWindows ? CC_C : CC_X86_64Win64;
+
+  if (D->hasAttr<SysVABIAttr>())
+    return IsWindows ? CC_X86_64SysV : CC_C;
+
   return CC_C;
 }
 
+static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) {
+  switch (FI.getEffectiveCallingConvention()) {
+  case llvm::CallingConv::C:
+    switch (Target.getTriple().getEnvironment()) {
+    case llvm::Triple::EABIHF:
+    case llvm::Triple::GNUEABIHF:
+      return true;
+    default:
+      return false;
+    }
+  case llvm::CallingConv::ARM_AAPCS_VFP:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Arrange the argument and result information for a call to an
 /// unknown C++ non-static member function of the given abstract type.
 /// (Zero value of RD means we don't have any meaningful "this" argument type,
@@ -202,18 +228,41 @@ CodeGenTypes::arrangeCXXConstructorDeclaration(const CXXConstructorDecl *D,
   CanQualType resultType =
     TheCXXABI.HasThisReturn(GD) ? argTypes.front() : Context.VoidTy;
 
-  TheCXXABI.BuildConstructorSignature(D, ctorKind, resultType, argTypes);
-
   CanQual<FunctionProtoType> FTP = GetFormalType(D);
 
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, argTypes.size());
-
   // Add the formal parameters.
-  for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
-    argTypes.push_back(FTP->getArgType(i));
+  for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
+    argTypes.push_back(FTP->getParamType(i));
+
+  TheCXXABI.BuildConstructorSignature(D, ctorKind, resultType, argTypes);
+
+  RequiredArgs required =
+      (D->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All);
 
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
-  return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo, required);
+  return arrangeLLVMFunctionInfo(resultType, true, argTypes, extInfo, required);
+}
+
+/// Arrange a call to a C++ method, passing the given arguments.
+const CGFunctionInfo &
+CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
+                                        const CXXConstructorDecl *D,
+                                        CXXCtorType CtorKind,
+                                        unsigned ExtraArgs) {
+  // FIXME: Kill copy.
+  SmallVector<CanQualType, 16> ArgTypes;
+  for (CallArgList::const_iterator i = args.begin(), e = args.end(); i != e;
+       ++i)
+    ArgTypes.push_back(Context.getCanonicalParamType(i->Ty));
+
+  CanQual<FunctionProtoType> FPT = GetFormalType(D);
+  RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs);
+  GlobalDecl GD(D, CtorKind);
+  CanQualType ResultType =
+      TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() : Context.VoidTy;
+
+  FunctionType::ExtInfo Info = FPT->getExtInfo();
+  return arrangeLLVMFunctionInfo(ResultType, true, ArgTypes, Info, Required);
 }
 
 /// Arrange the argument and result information for a declaration,
@@ -232,11 +281,11 @@ CodeGenTypes::arrangeCXXDestructor(const CXXDestructorDecl *D,
   TheCXXABI.BuildDestructorSignature(D, dtorKind, resultType, argTypes);
 
   CanQual<FunctionProtoType> FTP = GetFormalType(D);
-  assert(FTP->getNumArgs() == 0 && "dtor with formal parameters");
+  assert(FTP->getNumParams() == 0 && "dtor with formal parameters");
   assert(FTP->isVariadic() == 0 && "dtor with formal parameters");
 
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
-  return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo,
+  return arrangeLLVMFunctionInfo(resultType, true, argTypes, extInfo,
                                  RequiredArgs::All);
 }
 
@@ -256,7 +305,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
   // non-variadic type.
   if (isa<FunctionNoProtoType>(FTy)) {
     CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
-    return arrangeLLVMFunctionInfo(noProto->getResultType(), None,
+    return arrangeLLVMFunctionInfo(noProto->getReturnType(), false, None,
                                    noProto->getExtInfo(), RequiredArgs::All);
   }
 
@@ -286,13 +335,13 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
   argTys.push_back(Context.getCanonicalParamType(receiverType));
   argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
   // FIXME: Kill copy?
-  for (ObjCMethodDecl::param_const_iterator i = MD->param_begin(),
-         e = MD->param_end(); i != e; ++i) {
-    argTys.push_back(Context.getCanonicalParamType((*i)->getType()));
+  for (const auto *I : MD->params()) {
+    argTys.push_back(Context.getCanonicalParamType(I->getType()));
   }
 
   FunctionType::ExtInfo einfo;
-  einfo = einfo.withCallingConv(getCallingConventionForDecl(MD));
+  bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows();
+  einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows));
 
   if (getContext().getLangOpts().ObjCAutoRefCount &&
       MD->hasAttr<NSReturnsRetainedAttr>())
@@ -301,8 +350,8 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
   RequiredArgs required =
     (MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);
 
-  return arrangeLLVMFunctionInfo(GetReturnType(MD->getResultType()), argTys,
-                                 einfo, required);
+  return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()), false,
+                                 argTys, einfo, required);
 }
 
 const CGFunctionInfo &
@@ -336,7 +385,7 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
   // extra prefix plus the arguments in the prototype.
   if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
     if (proto->isVariadic())
-      required = RequiredArgs(proto->getNumArgs() + numExtraRequiredArgs);
+      required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs);
 
   // If we don't have a prototype at all, but we're supposed to
   // explicitly use the variadic convention for unprototyped calls,
@@ -348,7 +397,7 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
     required = RequiredArgs(args.size());
   }
 
-  return CGT.arrangeFreeFunctionCall(fnType->getResultType(), args,
+  return CGT.arrangeFreeFunctionCall(fnType->getReturnType(), args,
                                      fnType->getExtInfo(), required);
 }
 
@@ -380,8 +429,8 @@ CodeGenTypes::arrangeFreeFunctionCall(QualType resultType,
   for (CallArgList::const_iterator i = args.begin(), e = args.end();
        i != e; ++i)
     argTypes.push_back(Context.getCanonicalParamType(i->Ty));
-  return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info,
-                                 required);
+  return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes,
+                                 info, required);
 }
 
 /// Arrange a call to a C++ method, passing the given arguments.
@@ -396,15 +445,13 @@ CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
     argTypes.push_back(Context.getCanonicalParamType(i->Ty));
 
   FunctionType::ExtInfo info = FPT->getExtInfo();
-  return arrangeLLVMFunctionInfo(GetReturnType(FPT->getResultType()),
+  return arrangeLLVMFunctionInfo(GetReturnType(FPT->getReturnType()), true,
                                  argTypes, info, required);
 }
 
-const CGFunctionInfo &
-CodeGenTypes::arrangeFunctionDeclaration(QualType resultType,
-                                         const FunctionArgList &args,
-                                         const FunctionType::ExtInfo &info,
-                                         bool isVariadic) {
+const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration(
+    QualType resultType, const FunctionArgList &args,
+    const FunctionType::ExtInfo &info, bool isVariadic) {
   // FIXME: Kill copy.
   SmallVector<CanQualType, 16> argTypes;
   for (FunctionArgList::const_iterator i = args.begin(), e = args.end();
@@ -413,12 +460,12 @@ CodeGenTypes::arrangeFunctionDeclaration(QualType resultType,
 
   RequiredArgs required =
     (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
-  return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info,
+  return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes, info,
                                  required);
 }
 
 const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
-  return arrangeLLVMFunctionInfo(getContext().VoidTy, None,
+  return arrangeLLVMFunctionInfo(getContext().VoidTy, false, None,
                                  FunctionType::ExtInfo(), RequiredArgs::All);
 }
 
@@ -427,6 +474,7 @@ const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
 /// above functions ultimately defer to.
 const CGFunctionInfo &
 CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
+                                      bool IsInstanceMethod,
                                       ArrayRef<CanQualType> argTypes,
                                       FunctionType::ExtInfo info,
                                       RequiredArgs required) {
@@ -440,15 +488,17 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
 
   // Lookup or create unique function info.
   llvm::FoldingSetNodeID ID;
-  CGFunctionInfo::Profile(ID, info, required, resultType, argTypes);
+  CGFunctionInfo::Profile(ID, IsInstanceMethod, info, required, resultType,
+                          argTypes);
 
-  void *insertPos = 0;
+  void *insertPos = nullptr;
   CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
   if (FI)
     return *FI;
 
   // Construct the function info.  We co-allocate the ArgInfos.
-  FI = CGFunctionInfo::create(CC, info, resultType, argTypes, required);
+  FI = CGFunctionInfo::create(CC, IsInstanceMethod, info, resultType, argTypes,
+                              required);
   FunctionInfos.InsertNode(FI, insertPos);
 
   bool inserted = FunctionsBeingProcessed.insert(FI); (void)inserted;
@@ -461,13 +511,12 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
   // them are direct or extend without a specified coerce type, specify the
   // default now.
   ABIArgInfo &retInfo = FI->getReturnInfo();
-  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == 0)
+  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
     retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
 
-  for (CGFunctionInfo::arg_iterator I = FI->arg_begin(), E = FI->arg_end();
-       I != E; ++I)
-    if (I->info.canHaveCoerceToType() && I->info.getCoerceToType() == 0)
-      I->info.setCoerceToType(ConvertType(I->type));
+  for (auto &I : FI->arguments())
+    if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
+      I.info.setCoerceToType(ConvertType(I.type));
 
   bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
   assert(erased && "Not in set?");
@@ -476,6 +525,7 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
 }
 
 CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
+                                       bool IsInstanceMethod,
                                        const FunctionType::ExtInfo &info,
                                        CanQualType resultType,
                                        ArrayRef<CanQualType> argTypes,
@@ -486,11 +536,13 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
   FI->CallingConvention = llvmCC;
   FI->EffectiveCallingConvention = llvmCC;
   FI->ASTCallingConvention = info.getCC();
+  FI->InstanceMethod = IsInstanceMethod;
   FI->NoReturn = info.getNoReturn();
   FI->ReturnsRetained = info.getProducesResult();
   FI->Required = required;
   FI->HasRegParm = info.getHasRegParm();
   FI->RegParm = info.getRegParm();
+  FI->ArgStruct = nullptr;
   FI->NumArgs = argTypes.size();
   FI->getArgsBuffer()[0].type = resultType;
   for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
@@ -513,12 +565,10 @@ void CodeGenTypes::GetExpandedTypes(QualType type,
     if (RD->isUnion()) {
       // Unions can be here only in degenerative cases - all the fields are same
       // after flattening. Thus we have to use the "largest" field.
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        const FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         assert(!FD->isBitField() &&
                "Cannot expand structure with bit-field members.");
         CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -530,11 +580,10 @@ void CodeGenTypes::GetExpandedTypes(QualType type,
       if (LargestFD)
         GetExpandedTypes(LargestFD->getType(), expandedTypes);
     } else {
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        assert(!i->isBitField() &&
+      for (const auto *I : RD->fields()) {
+        assert(!I->isBitField() &&
                "Cannot expand structure with bit-field members.");
-        GetExpandedTypes(i->getType(), expandedTypes);
+        GetExpandedTypes(I->getType(), expandedTypes);
       }
     }
   } else if (const ComplexType *CT = type->getAs<ComplexType>()) {
@@ -564,12 +613,10 @@ CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
     if (RD->isUnion()) {
       // Unions can be here only in degenerative cases - all the fields are same
       // after flattening. Thus we have to use the "largest" field.
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        const FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         assert(!FD->isBitField() &&
                "Cannot expand structure with bit-field members.");
         CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -584,9 +631,7 @@ CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
         AI = ExpandTypeFromArgs(LargestFD->getType(), SubLV, AI);
       }
     } else {
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         QualType FT = FD->getType();
 
         // FIXME: What are the right qualifiers here?
@@ -672,8 +717,9 @@ static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val,
     if (DL.isBigEndian()) {
       // Preserve the high bits on big-endian targets.
       // That is what memory coercion does.
-      uint64_t SrcSize = DL.getTypeAllocSizeInBits(Val->getType());
-      uint64_t DstSize = DL.getTypeAllocSizeInBits(DestIntTy);
+      uint64_t SrcSize = DL.getTypeSizeInBits(Val->getType());
+      uint64_t DstSize = DL.getTypeSizeInBits(DestIntTy);
+
       if (SrcSize > DstSize) {
         Val = CGF.Builder.CreateLShr(Val, SrcSize - DstSize, "coerce.highbits");
         Val = CGF.Builder.CreateTrunc(Val, DestIntTy, "coerce.val.ii");
@@ -850,6 +896,11 @@ bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
   return FI.getReturnInfo().isIndirect();
 }
 
+bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
+  return ReturnTypeUsesSRet(FI) &&
+         getTargetCodeGenInfo().doesReturnSlotInterfereWithArgs();
+}
+
 bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) {
   if (const BuiltinType *BT = ResultType->getAs<BuiltinType>()) {
     switch (BT->getKind()) {
@@ -889,8 +940,9 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
   bool Inserted = FunctionsBeingProcessed.insert(&FI); (void)Inserted;
   assert(Inserted && "Recursively being processed?");
   
+  bool SwapThisWithSRet = false;
   SmallVector<llvm::Type*, 8> argTypes;
-  llvm::Type *resultType = 0;
+  llvm::Type *resultType = nullptr;
 
   const ABIArgInfo &retAI = FI.getReturnInfo();
   switch (retAI.getKind()) {
@@ -902,6 +954,18 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
     resultType = retAI.getCoerceToType();
     break;
 
+  case ABIArgInfo::InAlloca:
+    if (retAI.getInAllocaSRet()) {
+      // sret things on win32 aren't void, they return the sret pointer.
+      QualType ret = FI.getReturnType();
+      llvm::Type *ty = ConvertType(ret);
+      unsigned addressSpace = Context.getTargetAddressSpace(ret);
+      resultType = llvm::PointerType::get(ty, addressSpace);
+    } else {
+      resultType = llvm::Type::getVoidTy(getLLVMContext());
+    }
+    break;
+
   case ABIArgInfo::Indirect: {
     assert(!retAI.getIndirectAlign() && "Align unused on indirect return.");
     resultType = llvm::Type::getVoidTy(getLLVMContext());
@@ -910,6 +974,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
     llvm::Type *ty = ConvertType(ret);
     unsigned addressSpace = Context.getTargetAddressSpace(ret);
     argTypes.push_back(llvm::PointerType::get(ty, addressSpace));
+
+    SwapThisWithSRet = retAI.isSRetAfterThis();
     break;
   }
 
@@ -934,6 +1000,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
 
     switch (argAI.getKind()) {
     case ABIArgInfo::Ignore:
+    case ABIArgInfo::InAlloca:
       break;
 
     case ABIArgInfo::Indirect: {
@@ -948,8 +1015,11 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
       // If the coerce-to type is a first class aggregate, flatten it.  Either
       // way is semantically identical, but fast-isel and the optimizer
       // generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::Type *argType = argAI.getCoerceToType();
-      if (llvm::StructType *st = dyn_cast<llvm::StructType>(argType)) {
+      llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
+      if (st && !isAAPCSVFP(FI, getTarget())) {
         for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
           argTypes.push_back(st->getElementType(i));
       } else {
@@ -964,6 +1034,13 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
     }
   }
 
+  // Add the inalloca struct as the last parameter type.
+  if (llvm::StructType *ArgStruct = FI.getArgStruct())
+    argTypes.push_back(ArgStruct->getPointerTo());
+
+  if (SwapThisWithSRet)
+    std::swap(argTypes[0], argTypes[1]);
+
   bool Erased = FunctionsBeingProcessed.erase(&FI); (void)Erased;
   assert(Erased && "Not in set?");
   
@@ -1006,6 +1083,8 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     if (TargetDecl->hasAttr<NoReturnAttr>())
       FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
+    if (TargetDecl->hasAttr<NoDuplicateAttr>())
+      FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
 
     if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
       const FunctionProtoType *FPT = Fn->getType()->getAs<FunctionProtoType>();
@@ -1028,6 +1107,8 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     }
     if (TargetDecl->hasAttr<MallocAttr>())
       RetAttrs.addAttribute(llvm::Attribute::NoAlias);
+    if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
+      RetAttrs.addAttribute(llvm::Attribute::NonNull);
   }
 
   if (CodeGenOpts.OptimizeSize)
@@ -1038,6 +1119,9 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
   if (CodeGenOpts.NoImplicitFloat)
     FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+  if (CodeGenOpts.EnableSegmentedStacks &&
+      !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
+    FuncAttrs.addAttribute("split-stack");
 
   if (AttrOnCallSite) {
     // Attributes that should go on the call site only.
@@ -1074,6 +1158,7 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
 
   QualType RetTy = FI.getReturnType();
   unsigned Index = 1;
+  bool SwapThisWithSRet = false;
   const ABIArgInfo &RetAI = FI.getReturnInfo();
   switch (RetAI.getKind()) {
   case ABIArgInfo::Extend:
@@ -1089,15 +1174,24 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
   case ABIArgInfo::Ignore:
     break;
 
+  case ABIArgInfo::InAlloca: {
+    // inalloca disables readnone and readonly
+    FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+      .removeAttribute(llvm::Attribute::ReadNone);
+    break;
+  }
+
   case ABIArgInfo::Indirect: {
     llvm::AttrBuilder SRETAttrs;
     SRETAttrs.addAttribute(llvm::Attribute::StructRet);
     if (RetAI.getInReg())
       SRETAttrs.addAttribute(llvm::Attribute::InReg);
-    PAL.push_back(llvm::
-                  AttributeSet::get(getLLVMContext(), Index, SRETAttrs));
+    SwapThisWithSRet = RetAI.isSRetAfterThis();
+    PAL.push_back(llvm::AttributeSet::get(
+        getLLVMContext(), SwapThisWithSRet ? 2 : Index, SRETAttrs));
 
-    ++Index;
+    if (!SwapThisWithSRet)
+      ++Index;
     // sret disables readnone and readonly
     FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
       .removeAttribute(llvm::Attribute::ReadNone);
@@ -1108,18 +1202,31 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     llvm_unreachable("Invalid ABI kind for return argument");
   }
 
+  if (const auto *RefTy = RetTy->getAs<ReferenceType>()) {
+    QualType PTy = RefTy->getPointeeType();
+    if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
+      RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
+                                        .getQuantity());
+    else if (getContext().getTargetAddressSpace(PTy) == 0)
+      RetAttrs.addAttribute(llvm::Attribute::NonNull);
+  }
+
   if (RetAttrs.hasAttributes())
     PAL.push_back(llvm::
                   AttributeSet::get(getLLVMContext(),
                                     llvm::AttributeSet::ReturnIndex,
                                     RetAttrs));
 
-  for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
-         ie = FI.arg_end(); it != ie; ++it) {
-    QualType ParamType = it->type;
-    const ABIArgInfo &AI = it->info;
+  for (const auto &I : FI.arguments()) {
+    QualType ParamType = I.type;
+    const ABIArgInfo &AI = I.info;
     llvm::AttrBuilder Attrs;
 
+    // Skip over the sret parameter when it comes second.  We already handled it
+    // above.
+    if (Index == 2 && SwapThisWithSRet)
+      ++Index;
+
     if (AI.getPaddingType()) {
       if (AI.getPaddingInReg())
         PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index,
@@ -1138,14 +1245,15 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       else if (ParamType->isUnsignedIntegerOrEnumerationType())
         Attrs.addAttribute(llvm::Attribute::ZExt);
       // FALL THROUGH
-    case ABIArgInfo::Direct:
+    case ABIArgInfo::Direct: {
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
 
       // FIXME: handle sseregparm someday...
 
-      if (llvm::StructType *STy =
-          dyn_cast<llvm::StructType>(AI.getCoerceToType())) {
+      llvm::StructType *STy =
+          dyn_cast<llvm::StructType>(AI.getCoerceToType());
+      if (!isAAPCSVFP(FI, getTarget()) && STy) {
         unsigned Extra = STy->getNumElements()-1;  // 1 will be added below.
         if (Attrs.hasAttributes())
           for (unsigned I = 0; I < Extra; ++I)
@@ -1154,7 +1262,7 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
         Index += Extra;
       }
       break;
-
+    }
     case ABIArgInfo::Indirect:
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
@@ -1173,6 +1281,13 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       // Skip increment, no matching LLVM parameter.
       continue;
 
+    case ABIArgInfo::InAlloca:
+      // inalloca disables readnone and readonly.
+      FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
+          .removeAttribute(llvm::Attribute::ReadNone);
+      // Skip increment, no matching LLVM parameter.
+      continue;
+
     case ABIArgInfo::Expand: {
       SmallVector<llvm::Type*, 8> types;
       // FIXME: This is rather inefficient. Do we ever actually need to do
@@ -1184,10 +1299,27 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     }
     }
 
+    if (const auto *RefTy = ParamType->getAs<ReferenceType>()) {
+      QualType PTy = RefTy->getPointeeType();
+      if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
+        Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
+                                       .getQuantity());
+      else if (getContext().getTargetAddressSpace(PTy) == 0)
+        Attrs.addAttribute(llvm::Attribute::NonNull);
+    }
+
     if (Attrs.hasAttributes())
       PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index, Attrs));
     ++Index;
   }
+
+  // Add the inalloca attribute to the trailing inalloca parameter if present.
+  if (FI.usesInAlloca()) {
+    llvm::AttrBuilder Attrs;
+    Attrs.addAttribute(llvm::Attribute::InAlloca);
+    PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index, Attrs));
+  }
+
   if (FuncAttrs.hasAttributes())
     PAL.push_back(llvm::
                   AttributeSet::get(getLLVMContext(),
@@ -1224,7 +1356,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
   // return statements.
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl)) {
     if (FD->hasImplicitReturnZero()) {
-      QualType RetTy = FD->getResultType().getUnqualifiedType();
+      QualType RetTy = FD->getReturnType().getUnqualifiedType();
       llvm::Type* LLVMTy = CGM.getTypes().ConvertType(RetTy);
       llvm::Constant* Zero = llvm::Constant::getNullValue(LLVMTy);
       Builder.CreateStore(Zero, ReturnValue);
@@ -1237,15 +1369,48 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
   // Emit allocs for param decls.  Give the LLVM Argument nodes names.
   llvm::Function::arg_iterator AI = Fn->arg_begin();
 
-  // Name the struct return argument.
-  if (CGM.ReturnTypeUsesSRet(FI)) {
+  // If we're using inalloca, all the memory arguments are GEPs off of the last
+  // parameter, which is a pointer to the complete memory area.
+  llvm::Value *ArgStruct = nullptr;
+  if (FI.usesInAlloca()) {
+    llvm::Function::arg_iterator EI = Fn->arg_end();
+    --EI;
+    ArgStruct = EI;
+    assert(ArgStruct->getType() == FI.getArgStruct()->getPointerTo());
+  }
+
+  // Name the struct return parameter, which can come first or second.
+  const ABIArgInfo &RetAI = FI.getReturnInfo();
+  bool SwapThisWithSRet = false;
+  if (RetAI.isIndirect()) {
+    SwapThisWithSRet = RetAI.isSRetAfterThis();
+    if (SwapThisWithSRet)
+      ++AI;
     AI->setName("agg.result");
-    AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
-                                        AI->getArgNo() + 1,
+    AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
                                         llvm::Attribute::NoAlias));
-    ++AI;
+    if (SwapThisWithSRet)
+      --AI;  // Go back to the beginning for 'this'.
+    else
+      ++AI;  // Skip the sret parameter.
   }
 
+  // Get the function-level nonnull attribute if it exists.
+  const NonNullAttr *NNAtt =
+    CurCodeDecl ? CurCodeDecl->getAttr<NonNullAttr>() : nullptr;
+
+  // Track if we received the parameter as a pointer (indirect, byval, or
+  // inalloca).  If already have a pointer, EmitParmDecl doesn't need to copy it
+  // into a local alloca for us.
+  enum ValOrPointer { HaveValue = 0, HavePointer = 1 };
+  typedef llvm::PointerIntPair<llvm::Value *, 1> ValueAndIsPtr;
+  SmallVector<ValueAndIsPtr, 16> ArgVals;
+  ArgVals.reserve(Args.size());
+
+  // Create a pointer value for every parameter declaration.  This usually
+  // entails copying one or more LLVM IR arguments into an alloca.  Don't push
+  // any cleanups or do anything that might unwind.  We do that separately, so
+  // we can push the cleanups in the correct order for the ABI.
   assert(FI.arg_size() == Args.size() &&
          "Mismatch between function signature & arguments.");
   unsigned ArgNo = 1;
@@ -1264,6 +1429,13 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       ++AI;
 
     switch (ArgI.getKind()) {
+    case ABIArgInfo::InAlloca: {
+      llvm::Value *V = Builder.CreateStructGEP(
+          ArgStruct, ArgI.getInAllocaFieldIndex(), Arg->getName());
+      ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
+      continue;  // Don't increment AI!
+    }
+
     case ABIArgInfo::Indirect: {
       llvm::Value *V = AI;
 
@@ -1290,6 +1462,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                false);
           V = AlignedTemp;
         }
+        ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
       } else {
         // Load scalar value from indirect argument.
         CharUnits Alignment = getContext().getTypeAlignInChars(Ty);
@@ -1298,8 +1471,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
+        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
       }
-      EmitParmDecl(*Arg, V, ArgNo);
       break;
     }
 
@@ -1313,6 +1486,49 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         assert(AI != Fn->arg_end() && "Argument mismatch!");
         llvm::Value *V = AI;
 
+        if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
+          if ((NNAtt && NNAtt->isNonNull(PVD->getFunctionScopeIndex())) ||
+              PVD->hasAttr<NonNullAttr>())
+            AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                AI->getArgNo() + 1,
+                                                llvm::Attribute::NonNull));
+
+          QualType OTy = PVD->getOriginalType();
+          if (const auto *ArrTy =
+              getContext().getAsConstantArrayType(OTy)) {
+            // A C99 array parameter declaration with the static keyword also
+            // indicates dereferenceability, and if the size is constant we can
+            // use the dereferenceable attribute (which requires the size in
+            // bytes).
+            if (ArrTy->getSizeModifier() == ArrayType::Static) {
+              QualType ETy = ArrTy->getElementType();
+              uint64_t ArrSize = ArrTy->getSize().getZExtValue();
+              if (!ETy->isIncompleteType() && ETy->isConstantSizeType() &&
+                  ArrSize) {
+                llvm::AttrBuilder Attrs;
+                Attrs.addDereferenceableAttr(
+                  getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
+                AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                    AI->getArgNo() + 1, Attrs));
+              } else if (getContext().getTargetAddressSpace(ETy) == 0) {
+                AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                    AI->getArgNo() + 1,
+                                                    llvm::Attribute::NonNull));
+              }
+            }
+          } else if (const auto *ArrTy =
+                     getContext().getAsVariableArrayType(OTy)) {
+            // For C99 VLAs with the static keyword, we don't know the size so
+            // we can't use the dereferenceable attribute, but in addrspace(0)
+            // we know that it must be nonnull.
+            if (ArrTy->getSizeModifier() == VariableArrayType::Static &&
+                !getContext().getTargetAddressSpace(ArrTy->getElementType()))
+              AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
+                                                  AI->getArgNo() + 1,
+                                                  llvm::Attribute::NonNull));
+          }
+        }
+
         if (Arg->getType().isRestrictQualified())
           AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
                                               AI->getArgNo() + 1,
@@ -1340,7 +1556,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         if (V->getType() != LTy)
           V = Builder.CreateBitCast(V, LTy);
 
-        EmitParmDecl(*Arg, V, ArgNo);
+        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
         break;
       }
 
@@ -1368,8 +1584,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
-      if (STy && STy->getNumElements() > 1) {
+      if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) {
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
         llvm::Type *DstTy =
           cast<llvm::PointerType>(Ptr->getType())->getElementType();
@@ -1412,8 +1630,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         V = EmitLoadOfScalar(V, false, AlignmentToUse, Ty, Arg->getLocStart());
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
+        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
+      } else {
+        ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
       }
-      EmitParmDecl(*Arg, V, ArgNo);
       continue;  // Skip ++AI increment, already done.
     }
 
@@ -1426,7 +1646,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       Alloca->setAlignment(Align.getQuantity());
       LValue LV = MakeAddrLValue(Alloca, Ty, Align);
       llvm::Function::arg_iterator End = ExpandTypeFromArgs(Ty, LV, AI);
-      EmitParmDecl(*Arg, Alloca, ArgNo);
+      ArgVals.push_back(ValueAndIsPtr(Alloca, HavePointer));
 
       // Name the arguments used in expansion and increment AI.
       unsigned Index = 0;
@@ -1437,19 +1657,36 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
     case ABIArgInfo::Ignore:
       // Initialize the local variable appropriately.
-      if (!hasScalarEvaluationKind(Ty))
-        EmitParmDecl(*Arg, CreateMemTemp(Ty), ArgNo);
-      else
-        EmitParmDecl(*Arg, llvm::UndefValue::get(ConvertType(Arg->getType())),
-                     ArgNo);
+      if (!hasScalarEvaluationKind(Ty)) {
+        ArgVals.push_back(ValueAndIsPtr(CreateMemTemp(Ty), HavePointer));
+      } else {
+        llvm::Value *U = llvm::UndefValue::get(ConvertType(Arg->getType()));
+        ArgVals.push_back(ValueAndIsPtr(U, HaveValue));
+      }
 
       // Skip increment, no matching LLVM parameter.
       continue;
     }
 
     ++AI;
+
+    if (ArgNo == 1 && SwapThisWithSRet)
+      ++AI;  // Skip the sret parameter.
   }
+
+  if (FI.usesInAlloca())
+    ++AI;
   assert(AI == Fn->arg_end() && "Argument mismatch!");
+
+  if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    for (int I = Args.size() - 1; I >= 0; --I)
+      EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
+                   I + 1);
+  } else {
+    for (unsigned I = 0, E = Args.size(); I != E; ++I)
+      EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
+                   I + 1);
+  }
 }
 
 static void eraseUnusedBitCasts(llvm::Instruction *insn) {
@@ -1468,8 +1705,8 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
                                                     llvm::Value *result) {
   // We must be immediately followed the cast.
   llvm::BasicBlock *BB = CGF.Builder.GetInsertBlock();
-  if (BB->empty()) return 0;
-  if (&BB->back() != result) return 0;
+  if (BB->empty()) return nullptr;
+  if (&BB->back() != result) return nullptr;
 
   llvm::Type *resultType = result->getType();
 
@@ -1487,7 +1724,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
 
     // Require the generator to be immediately followed by the cast.
     if (generator->getNextNode() != bitcast)
-      return 0;
+      return nullptr;
 
     insnsToKill.push_back(bitcast);
   }
@@ -1497,7 +1734,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
   // or
   //   %generator = call i8* @objc_retainAutoreleasedReturnValue(i8* %originalResult)
   llvm::CallInst *call = dyn_cast<llvm::CallInst>(generator);
-  if (!call) return 0;
+  if (!call) return nullptr;
 
   bool doRetainAutorelease;
 
@@ -1525,7 +1762,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
       insnsToKill.push_back(prev);
     }
   } else {
-    return 0;
+    return nullptr;
   }
 
   result = call->getArgOperand(0);
@@ -1558,16 +1795,16 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
   // This is only applicable to a method with an immutable 'self'.
   const ObjCMethodDecl *method =
     dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl);
-  if (!method) return 0;
+  if (!method) return nullptr;
   const VarDecl *self = method->getSelfDecl();
-  if (!self->getType().isConstQualified()) return 0;
+  if (!self->getType().isConstQualified()) return nullptr;
 
   // Look for a retain call.
   llvm::CallInst *retainCall =
     dyn_cast<llvm::CallInst>(result->stripPointerCasts());
   if (!retainCall ||
       retainCall->getCalledValue() != CGF.CGM.getARCEntrypoints().objc_retain)
-    return 0;
+    return nullptr;
 
   // Look for an ordinary load of 'self'.
   llvm::Value *retainedValue = retainCall->getArgOperand(0);
@@ -1575,7 +1812,7 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
     dyn_cast<llvm::LoadInst>(retainedValue->stripPointerCasts());
   if (!load || load->isAtomic() || load->isVolatile() || 
       load->getPointerOperand() != CGF.GetAddrOfLocalVar(self))
-    return 0;
+    return nullptr;
 
   // Okay!  Burn it all down.  This relies for correctness on the
   // assumption that the retain is emitted as part of the return and
@@ -1617,17 +1854,17 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
   // with noreturn cleanups.
   if (!CGF.ReturnValue->hasOneUse()) {
     llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
-    if (IP->empty()) return 0;
+    if (IP->empty()) return nullptr;
     llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(&IP->back());
-    if (!store) return 0;
-    if (store->getPointerOperand() != CGF.ReturnValue) return 0;
+    if (!store) return nullptr;
+    if (store->getPointerOperand() != CGF.ReturnValue) return nullptr;
     assert(!store->isAtomic() && !store->isVolatile()); // see below
     return store;
   }
 
   llvm::StoreInst *store =
-    dyn_cast<llvm::StoreInst>(CGF.ReturnValue->use_back());
-  if (!store) return 0;
+    dyn_cast<llvm::StoreInst>(CGF.ReturnValue->user_back());
+  if (!store) return nullptr;
 
   // These aren't actually possible for non-coerced returns, and we
   // only care about non-coerced returns on this code path.
@@ -1639,7 +1876,7 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
   llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
   while (IP != StoreBB) {
     if (!(IP = IP->getSinglePredecessor()))
-      return 0;
+      return nullptr;
   }
 
   // Okay, the store's basic block dominates the insertion point; we
@@ -1651,25 +1888,41 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
                                          bool EmitRetDbgLoc,
                                          SourceLocation EndLoc) {
   // Functions with no result always return void.
-  if (ReturnValue == 0) {
+  if (!ReturnValue) {
     Builder.CreateRetVoid();
     return;
   }
 
   llvm::DebugLoc RetDbgLoc;
-  llvm::Value *RV = 0;
+  llvm::Value *RV = nullptr;
   QualType RetTy = FI.getReturnType();
   const ABIArgInfo &RetAI = FI.getReturnInfo();
 
   switch (RetAI.getKind()) {
+  case ABIArgInfo::InAlloca:
+    // Aggregrates get evaluated directly into the destination.  Sometimes we
+    // need to return the sret value in a register, though.
+    assert(hasAggregateEvaluationKind(RetTy));
+    if (RetAI.getInAllocaSRet()) {
+      llvm::Function::arg_iterator EI = CurFn->arg_end();
+      --EI;
+      llvm::Value *ArgStruct = EI;
+      llvm::Value *SRet =
+          Builder.CreateStructGEP(ArgStruct, RetAI.getInAllocaFieldIndex());
+      RV = Builder.CreateLoad(SRet, "sret");
+    }
+    break;
+
   case ABIArgInfo::Indirect: {
+    auto AI = CurFn->arg_begin();
+    if (RetAI.isSRetAfterThis())
+      ++AI;
     switch (getEvaluationKind(RetTy)) {
     case TEK_Complex: {
       ComplexPairTy RT =
         EmitLoadOfComplex(MakeNaturalAlignAddrLValue(ReturnValue, RetTy),
                           EndLoc);
-      EmitStoreOfComplex(RT,
-                       MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+      EmitStoreOfComplex(RT, MakeNaturalAlignAddrLValue(AI, RetTy),
                          /*isInit*/ true);
       break;
     }
@@ -1678,7 +1931,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
       break;
     case TEK_Scalar:
       EmitStoreOfScalar(Builder.CreateLoad(ReturnValue),
-                        MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+                        MakeNaturalAlignAddrLValue(AI, RetTy),
                         /*isInit*/ true);
       break;
     }
@@ -1707,7 +1960,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
         // If that was the only use of the return value, nuke it as well now.
         if (ReturnValue->use_empty() && isa<llvm::AllocaInst>(ReturnValue)) {
           cast<llvm::AllocaInst>(ReturnValue)->eraseFromParent();
-          ReturnValue = 0;
+          ReturnValue = nullptr;
         }
 
       // Otherwise, we have to do a simple load.
@@ -1750,6 +2003,25 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
     Ret->setDebugLoc(RetDbgLoc);
 }
 
+static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
+  const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
+  return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
+}
+
+static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, QualType Ty) {
+  // FIXME: Generate IR in one pass, rather than going back and fixing up these
+  // placeholders.
+  llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
+  llvm::Value *Placeholder =
+      llvm::UndefValue::get(IRTy->getPointerTo()->getPointerTo());
+  Placeholder = CGF.Builder.CreateLoad(Placeholder);
+  return AggValueSlot::forAddr(Placeholder, CharUnits::Zero(),
+                               Ty.getQualifiers(),
+                               AggValueSlot::IsNotDestructed,
+                               AggValueSlot::DoesNotNeedGCBarriers,
+                               AggValueSlot::IsNotAliased);
+}
+
 void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
                                           const VarDecl *param,
                                           SourceLocation loc) {
@@ -1773,6 +2045,20 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
     return args.add(RValue::get(Builder.CreateLoad(local)), type);
   }
 
+  if (isInAllocaArgument(CGM.getCXXABI(), type)) {
+    AggValueSlot Slot = createPlaceholderSlot(*this, type);
+    Slot.setExternallyDestructed();
+
+    // FIXME: Either emit a copy constructor call, or figure out how to do
+    // guaranteed tail calls with perfect forwarding in LLVM.
+    CGM.ErrorUnsupported(param, "non-trivial argument copy for thunk");
+    EmitNullInitialization(Slot.getAddr(), type);
+
+    RValue RV = Slot.asRValue();
+    args.add(RV, type);
+    return;
+  }
+
   args.add(convertTempToRValue(local, type, loc), type);
 }
 
@@ -1792,7 +2078,7 @@ static void emitWriteback(CodeGenFunction &CGF,
   assert(!isProvablyNull(srcAddr) &&
          "shouldn't have writeback for provably null argument");
 
-  llvm::BasicBlock *contBB = 0;
+  llvm::BasicBlock *contBB = nullptr;
 
   // If the argument wasn't provably non-null, we need to null check
   // before doing the store.
@@ -1852,14 +2138,13 @@ static void emitWriteback(CodeGenFunction &CGF,
 
 static void emitWritebacks(CodeGenFunction &CGF,
                            const CallArgList &args) {
-  for (CallArgList::writeback_iterator
-         i = args.writeback_begin(), e = args.writeback_end(); i != e; ++i)
-    emitWriteback(CGF, *i);
+  for (const auto &I : args.writebacks())
+    emitWriteback(CGF, I);
 }
 
 static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
                                             const CallArgList &CallArgs) {
-  assert(CGF.getTarget().getCXXABI().isArgumentDestroyedByCallee());
+  assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee());
   ArrayRef<CallArgList::CallArgCleanup> Cleanups =
     CallArgs.getCleanupsToDeactivate();
   // Iterate in reverse to increase the likelihood of popping the cleanup.
@@ -1874,7 +2159,7 @@ static const Expr *maybeGetUnaryAddrOfOperand(const Expr *E) {
   if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E->IgnoreParens()))
     if (uop->getOpcode() == UO_AddrOf)
       return uop->getSubExpr();
-  return 0;
+  return nullptr;
 }
 
 /// Emit an argument that's being passed call-by-writeback.  That is,
@@ -1928,9 +2213,9 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
         cast<llvm::PointerType>(destType->getElementType()));
     CGF.Builder.CreateStore(null, temp);
   }
-  
-  llvm::BasicBlock *contBB = 0;
-  llvm::BasicBlock *originBB = 0;
+
+  llvm::BasicBlock *contBB = nullptr;
+  llvm::BasicBlock *originBB = nullptr;
 
   // If the address is *not* known to be non-null, we need to switch.
   llvm::Value *finalArgument;
@@ -1957,7 +2242,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
     }
   }
 
-  llvm::Value *valueToUse = 0;
+  llvm::Value *valueToUse = nullptr;
 
   // Perform a copy if necessary.
   if (shouldCopy) {
@@ -2004,6 +2289,99 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   args.add(RValue::get(finalArgument), CRE->getType());
 }
 
+void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) {
+  assert(!StackBase && !StackCleanup.isValid());
+
+  // Save the stack.
+  llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave);
+  StackBase = CGF.Builder.CreateCall(F, "inalloca.save");
+
+  // Control gets really tied up in landing pads, so we have to spill the
+  // stacksave to an alloca to avoid violating SSA form.
+  // TODO: This is dead if we never emit the cleanup.  We should create the
+  // alloca and store lazily on the first cleanup emission.
+  StackBaseMem = CGF.CreateTempAlloca(CGF.Int8PtrTy, "inalloca.spmem");
+  CGF.Builder.CreateStore(StackBase, StackBaseMem);
+  CGF.pushStackRestore(EHCleanup, StackBaseMem);
+  StackCleanup = CGF.EHStack.getInnermostEHScope();
+  assert(StackCleanup.isValid());
+}
+
+void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
+  if (StackBase) {
+    CGF.DeactivateCleanupBlock(StackCleanup, StackBase);
+    llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
+    // We could load StackBase from StackBaseMem, but in the non-exceptional
+    // case we can skip it.
+    CGF.Builder.CreateCall(F, StackBase);
+  }
+}
+
+void CodeGenFunction::EmitCallArgs(CallArgList &Args,
+                                   ArrayRef<QualType> ArgTypes,
+                                   CallExpr::const_arg_iterator ArgBeg,
+                                   CallExpr::const_arg_iterator ArgEnd,
+                                   bool ForceColumnInfo) {
+  CGDebugInfo *DI = getDebugInfo();
+  SourceLocation CallLoc;
+  if (DI) CallLoc = DI->getLocation();
+
+  // We *have* to evaluate arguments from right to left in the MS C++ ABI,
+  // because arguments are destroyed left to right in the callee.
+  if (CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    // Insert a stack save if we're going to need any inalloca args.
+    bool HasInAllocaArgs = false;
+    for (ArrayRef<QualType>::iterator I = ArgTypes.begin(), E = ArgTypes.end();
+         I != E && !HasInAllocaArgs; ++I)
+      HasInAllocaArgs = isInAllocaArgument(CGM.getCXXABI(), *I);
+    if (HasInAllocaArgs) {
+      assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
+      Args.allocateArgumentMemory(*this);
+    }
+
+    // Evaluate each argument.
+    size_t CallArgsStart = Args.size();
+    for (int I = ArgTypes.size() - 1; I >= 0; --I) {
+      CallExpr::const_arg_iterator Arg = ArgBeg + I;
+      EmitCallArg(Args, *Arg, ArgTypes[I]);
+      // Restore the debug location.
+      if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
+    }
+
+    // Un-reverse the arguments we just evaluated so they match up with the LLVM
+    // IR function.
+    std::reverse(Args.begin() + CallArgsStart, Args.end());
+    return;
+  }
+
+  for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
+    CallExpr::const_arg_iterator Arg = ArgBeg + I;
+    assert(Arg != ArgEnd);
+    EmitCallArg(Args, *Arg, ArgTypes[I]);
+    // Restore the debug location.
+    if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
+  }
+}
+
+namespace {
+
+struct DestroyUnpassedArg : EHScopeStack::Cleanup {
+  DestroyUnpassedArg(llvm::Value *Addr, QualType Ty)
+      : Addr(Addr), Ty(Ty) {}
+
+  llvm::Value *Addr;
+  QualType Ty;
+
+  void Emit(CodeGenFunction &CGF, Flags flags) override {
+    const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
+    assert(!Dtor->isTrivial());
+    CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
+                              /*Delegating=*/false, Addr);
+  }
+};
+
+}
+
 void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
                                   QualType type) {
   if (const ObjCIndirectCopyRestoreExpr *CRE
@@ -2027,22 +2405,36 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
   // However, we still have to push an EH-only cleanup in case we unwind before
   // we make it to the call.
   if (HasAggregateEvalKind &&
-      CGM.getTarget().getCXXABI().isArgumentDestroyedByCallee()) {
+      CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    // If we're using inalloca, use the argument memory.  Otherwise, use a
+    // temporary.
+    AggValueSlot Slot;
+    if (args.isUsingInAlloca())
+      Slot = createPlaceholderSlot(*this, type);
+    else
+      Slot = CreateAggTemp(type, "agg.tmp");
+
     const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
-    if (RD && RD->hasNonTrivialDestructor()) {
-      AggValueSlot Slot = CreateAggTemp(type, "agg.arg.tmp");
+    bool DestroyedInCallee =
+        RD && RD->hasNonTrivialDestructor() &&
+        CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
+    if (DestroyedInCallee)
       Slot.setExternallyDestructed();
-      EmitAggExpr(E, Slot);
-      RValue RV = Slot.asRValue();
-      args.add(RV, type);
 
-      pushDestroy(EHCleanup, RV.getAggregateAddr(), type, destroyCXXObject,
-                  /*useEHCleanupForArray*/ true);
+    EmitAggExpr(E, Slot);
+    RValue RV = Slot.asRValue();
+    args.add(RV, type);
+
+    if (DestroyedInCallee) {
+      // Create a no-op GEP between the placeholder and the cleanup so we can
+      // RAUW it successfully.  It also serves as a marker of the first
+      // instruction where the cleanup is active.
+      pushFullExprCleanup<DestroyUnpassedArg>(EHCleanup, Slot.getAddr(), type);
       // This unreachable is a temporary marker which will be removed later.
       llvm::Instruction *IsActive = Builder.CreateUnreachable();
       args.addArgCleanupDeactivation(EHStack.getInnermostEHScope(), IsActive);
-      return;
     }
+    return;
   }
 
   if (HasAggregateEvalKind && isa<ImplicitCastExpr>(E) &&
@@ -2128,6 +2520,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
     call->setCallingConv(getRuntimeCC());
     Builder.CreateUnreachable();
   }
+  PGO.setCurrentRegionUnreachable();
 }
 
 /// Emits a call or invoke instruction to the given nullary runtime
@@ -2206,12 +2599,10 @@ void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
     LValue LV = MakeAddrLValue(RV.getAggregateAddr(), Ty);
 
     if (RD->isUnion()) {
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        const FieldDecl *FD = *i;
+      for (const auto *FD : RD->fields()) {
         assert(!FD->isBitField() &&
                "Cannot expand structure with bit-field members.");
         CharUnits FieldSize = getContext().getTypeSizeInChars(FD->getType());
@@ -2225,10 +2616,7 @@ void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
         ExpandTypeToArgs(LargestFD->getType(), FldRV, Args, IRFuncTy);
       }
     } else {
-      for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-           i != e; ++i) {
-        FieldDecl *FD = *i;
-
+      for (const auto *FD : RD->fields()) {
         RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
         ExpandTypeToArgs(FD->getType(), FldRV, Args, IRFuncTy);
       }
@@ -2251,6 +2639,20 @@ void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
   }
 }
 
+/// \brief Store a non-aggregate value to an address to initialize it.  For
+/// initialization, a non-atomic store will be used.
+static void EmitInitStoreOfNonAggregate(CodeGenFunction &CGF, RValue Src,
+                                        LValue Dst) {
+  if (Src.isScalar())
+    CGF.EmitStoreOfScalar(Src.getScalarVal(), Dst, /*init=*/true);
+  else
+    CGF.EmitStoreOfComplex(Src.getComplexVal(), Dst, /*init=*/true);
+}
+
+void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
+                                                  llvm::Value *New) {
+  DeferredReplacements.push_back(std::make_pair(Old, New));
+}
 
 RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
                                  llvm::Value *Callee,
@@ -2272,14 +2674,44 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     cast<llvm::FunctionType>(
                   cast<llvm::PointerType>(Callee->getType())->getElementType());
 
+  // If we're using inalloca, insert the allocation after the stack save.
+  // FIXME: Do this earlier rather than hacking it in here!
+  llvm::Value *ArgMemory = nullptr;
+  if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
+    llvm::Instruction *IP = CallArgs.getStackBase();
+    llvm::AllocaInst *AI;
+    if (IP) {
+      IP = IP->getNextNode();
+      AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
+    } else {
+      AI = CreateTempAlloca(ArgStruct, "argmem");
+    }
+    AI->setUsedWithInAlloca(true);
+    assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
+    ArgMemory = AI;
+  }
+
   // If the call returns a temporary with struct return, create a temporary
   // alloca to hold the result, unless one is given to us.
-  if (CGM.ReturnTypeUsesSRet(CallInfo)) {
-    llvm::Value *Value = ReturnValue.getValue();
-    if (!Value)
-      Value = CreateMemTemp(RetTy);
-    Args.push_back(Value);
-    checkArgMatches(Value, IRArgNo, IRFuncTy);
+  llvm::Value *SRetPtr = nullptr;
+  bool SwapThisWithSRet = false;
+  if (RetAI.isIndirect() || RetAI.isInAlloca()) {
+    SRetPtr = ReturnValue.getValue();
+    if (!SRetPtr)
+      SRetPtr = CreateMemTemp(RetTy);
+    if (RetAI.isIndirect()) {
+      Args.push_back(SRetPtr);
+      SwapThisWithSRet = RetAI.isSRetAfterThis();
+      if (SwapThisWithSRet)
+        IRArgNo = 1;
+      checkArgMatches(SRetPtr, IRArgNo, IRFuncTy);
+      if (SwapThisWithSRet)
+        IRArgNo = 0;
+    } else {
+      llvm::Value *Addr =
+          Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
+      Builder.CreateStore(SRetPtr, Addr);
+    }
   }
 
   assert(CallInfo.arg_size() == CallArgs.size() &&
@@ -2290,6 +2722,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     const ABIArgInfo &ArgInfo = info_it->info;
     RValue RV = I->RV;
 
+    // Skip 'sret' if it came second.
+    if (IRArgNo == 1 && SwapThisWithSRet)
+      ++IRArgNo;
+
     CharUnits TypeAlign = getContext().getTypeAlignInChars(I->Ty);
 
     // Insert a padding argument to ensure proper alignment.
@@ -2299,6 +2735,35 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
 
     switch (ArgInfo.getKind()) {
+    case ABIArgInfo::InAlloca: {
+      assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
+      if (RV.isAggregate()) {
+        // Replace the placeholder with the appropriate argument slot GEP.
+        llvm::Instruction *Placeholder =
+            cast<llvm::Instruction>(RV.getAggregateAddr());
+        CGBuilderTy::InsertPoint IP = Builder.saveIP();
+        Builder.SetInsertPoint(Placeholder);
+        llvm::Value *Addr = Builder.CreateStructGEP(
+            ArgMemory, ArgInfo.getInAllocaFieldIndex());
+        Builder.restoreIP(IP);
+        deferPlaceholderReplacement(Placeholder, Addr);
+      } else {
+        // Store the RValue into the argument struct.
+        llvm::Value *Addr =
+            Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
+        unsigned AS = Addr->getType()->getPointerAddressSpace();
+        llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
+        // There are some cases where a trivial bitcast is not avoidable.  The
+        // definition of a type later in a translation unit may change it's type
+        // from {}* to (%struct.foo*)*.
+        if (Addr->getType() != MemType)
+          Addr = Builder.CreateBitCast(Addr, MemType);
+        LValue argLV = MakeAddrLValue(Addr, I->Ty, TypeAlign);
+        EmitInitStoreOfNonAggregate(*this, RV, argLV);
+      }
+      break; // Don't increment IRArgNo!
+    }
+
     case ABIArgInfo::Indirect: {
       if (RV.isScalar() || RV.isComplex()) {
         // Make a temporary alloca to pass the argument.
@@ -2307,13 +2772,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
           AI->setAlignment(ArgInfo.getIndirectAlign());
         Args.push_back(AI);
 
-        LValue argLV =
-          MakeAddrLValue(Args.back(), I->Ty, TypeAlign);
-        
-        if (RV.isScalar())
-          EmitStoreOfScalar(RV.getScalarVal(), argLV, /*init*/ true);
-        else
-          EmitStoreOfComplex(RV.getComplexVal(), argLV, /*init*/ true);
+        LValue argLV = MakeAddrLValue(Args.back(), I->Ty, TypeAlign);
+        EmitInitStoreOfNonAggregate(*this, RV, argLV);
         
         // Validate argument match.
         checkArgMatches(AI, IRArgNo, IRFuncTy);
@@ -2386,11 +2846,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       if (RV.isScalar() || RV.isComplex()) {
         SrcPtr = CreateMemTemp(I->Ty, "coerce");
         LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
-        if (RV.isScalar()) {
-          EmitStoreOfScalar(RV.getScalarVal(), SrcLV, /*init*/ true);
-        } else {
-          EmitStoreOfComplex(RV.getComplexVal(), SrcLV, /*init*/ true);
-        }
+        EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
       } else
         SrcPtr = RV.getAggregateAddr();
 
@@ -2406,8 +2862,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      if (llvm::StructType *STy =
-            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
+      llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
+      if (STy && !isAAPCSVFP(CallInfo, getTarget())) {
         llvm::Type *SrcTy =
           cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -2456,6 +2915,42 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
   }
 
+  if (SwapThisWithSRet)
+    std::swap(Args[0], Args[1]);
+
+  if (ArgMemory) {
+    llvm::Value *Arg = ArgMemory;
+    if (CallInfo.isVariadic()) {
+      // When passing non-POD arguments by value to variadic functions, we will
+      // end up with a variadic prototype and an inalloca call site.  In such
+      // cases, we can't do any parameter mismatch checks.  Give up and bitcast
+      // the callee.
+      unsigned CalleeAS =
+          cast<llvm::PointerType>(Callee->getType())->getAddressSpace();
+      Callee = Builder.CreateBitCast(
+          Callee, getTypes().GetFunctionType(CallInfo)->getPointerTo(CalleeAS));
+    } else {
+      llvm::Type *LastParamTy =
+          IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1);
+      if (Arg->getType() != LastParamTy) {
+#ifndef NDEBUG
+        // Assert that these structs have equivalent element types.
+        llvm::StructType *FullTy = CallInfo.getArgStruct();
+        llvm::StructType *DeclaredTy = cast<llvm::StructType>(
+            cast<llvm::PointerType>(LastParamTy)->getElementType());
+        assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
+        for (llvm::StructType::element_iterator DI = DeclaredTy->element_begin(),
+                                                DE = DeclaredTy->element_end(),
+                                                FI = FullTy->element_begin();
+             DI != DE; ++DI, ++FI)
+          assert(*DI == *FI);
+#endif
+        Arg = Builder.CreateBitCast(Arg, LastParamTy);
+      }
+    }
+    Args.push_back(Arg);
+  }
+
   if (!CallArgs.getCleanupsToDeactivate().empty())
     deactivateArgCleanupsBeforeCall(*this, CallArgs);
 
@@ -2496,7 +2991,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
                                                      AttributeList);
 
-  llvm::BasicBlock *InvokeDest = 0;
+  llvm::BasicBlock *InvokeDest = nullptr;
   if (!Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
                           llvm::Attribute::NoUnwind))
     InvokeDest = getInvokeDest();
@@ -2512,6 +3007,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   if (callOrInvoke)
     *callOrInvoke = CS.getInstruction();
 
+  if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
+      !CS.hasFnAttr(llvm::Attribute::NoInline))
+    Attrs =
+        Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+                           llvm::Attribute::AlwaysInline);
+
   CS.setAttributes(Attrs);
   CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 
@@ -2545,9 +3046,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   if (CallArgs.hasWritebacks())
     emitWritebacks(*this, CallArgs);
 
+  // The stack cleanup for inalloca arguments has to run out of the normal
+  // lexical order, so deactivate it and run it manually here.
+  CallArgs.freeArgumentMemory(*this);
+
   switch (RetAI.getKind()) {
+  case ABIArgInfo::InAlloca:
   case ABIArgInfo::Indirect:
-    return convertTempToRValue(Args[0], RetTy, SourceLocation());
+    return convertTempToRValue(SRetPtr, RetTy, SourceLocation());
 
   case ABIArgInfo::Ignore:
     // If we are ignoring an argument that had a result, make sure to
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 532cb59c62e5..9510a1cd5461 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -56,6 +56,8 @@ namespace CodeGen {
   class CallArgList :
     public SmallVector<CallArg, 16> {
   public:
+    CallArgList() : StackBase(nullptr), StackBaseMem(nullptr) {}
+
     struct Writeback {
       /// The original argument.  Note that the argument l-value
       /// is potentially null.
@@ -97,9 +99,12 @@ namespace CodeGen {
 
     bool hasWritebacks() const { return !Writebacks.empty(); }
 
-    typedef SmallVectorImpl<Writeback>::const_iterator writeback_iterator;
-    writeback_iterator writeback_begin() const { return Writebacks.begin(); }
-    writeback_iterator writeback_end() const { return Writebacks.end(); }
+    typedef llvm::iterator_range<SmallVectorImpl<Writeback>::const_iterator>
+      writeback_const_range;
+
+    writeback_const_range writebacks() const {
+      return writeback_const_range(Writebacks.begin(), Writebacks.end());
+    }
 
     void addArgCleanupDeactivation(EHScopeStack::stable_iterator Cleanup,
                                    llvm::Instruction *IsActiveIP) {
@@ -113,6 +118,14 @@ namespace CodeGen {
       return CleanupsToDeactivate;
     }
 
+    void allocateArgumentMemory(CodeGenFunction &CGF);
+    llvm::Instruction *getStackBase() const { return StackBase; }
+    void freeArgumentMemory(CodeGenFunction &CGF) const;
+
+    /// \brief Returns if we're using an inalloca struct to pass arguments in
+    /// memory.
+    bool isUsingInAlloca() const { return StackBase; }
+
   private:
     SmallVector<Writeback, 1> Writebacks;
 
@@ -120,6 +133,17 @@ namespace CodeGen {
     /// is used to cleanup objects that are owned by the callee once the call
     /// occurs.
     SmallVector<CallArgCleanup, 1> CleanupsToDeactivate;
+
+    /// The stacksave call.  It dominates all of the argument evaluation.
+    llvm::CallInst *StackBase;
+
+    /// The alloca holding the stackbase.  We need it to maintain SSA form.
+    llvm::AllocaInst *StackBaseMem;
+
+    /// The iterator pointing to the stack restore cleanup.  We manually run and
+    /// deactivate this cleanup after the call in the unexceptional case because
+    /// it doesn't run in the normal order.
+    EHScopeStack::stable_iterator StackCleanup;
   };
 
   /// FunctionArgList - Type for representing both the decl and type
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 4848d7565d40..9427de14d704 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGBlocks.h"
+#include "CGCXXABI.h"
 #include "CGDebugInfo.h"
 #include "CGRecordLayout.h"
 #include "CodeGenFunction.h"
-#include "CGCXXABI.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/EvaluatedExprVisitor.h"
@@ -66,8 +66,8 @@ CodeGenModule::GetNonVirtualBaseClassOffset(const CXXRecordDecl *ClassDecl,
     ComputeNonVirtualBaseClassOffset(getContext(), ClassDecl,
                                      PathBegin, PathEnd);
   if (Offset.isZero())
-    return 0;
-  
+    return nullptr;
+
   llvm::Type *PtrDiffTy = 
   Types.ConvertType(getContext().getPointerDiffType());
   
@@ -114,7 +114,7 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, llvm::Value *ptr,
                                 CharUnits nonVirtualOffset,
                                 llvm::Value *virtualOffset) {
   // Assert that we have something to do.
-  assert(!nonVirtualOffset.isZero() || virtualOffset != 0);
+  assert(!nonVirtualOffset.isZero() || virtualOffset != nullptr);
 
   // Compute the offset from the static and dynamic components.
   llvm::Value *baseOffset;
@@ -143,8 +143,8 @@ CodeGenFunction::GetAddressOfBaseClass(llvm::Value *Value,
   assert(PathBegin != PathEnd && "Base path should not be empty!");
 
   CastExpr::path_const_iterator Start = PathBegin;
-  const CXXRecordDecl *VBase = 0;
-  
+  const CXXRecordDecl *VBase = nullptr;
+
   // Sema has done some convenient canonicalization here: if the
   // access path involved any virtual steps, the conversion path will
   // *start* with a step down to the correct virtual base subobject,
@@ -169,7 +169,7 @@ CodeGenFunction::GetAddressOfBaseClass(llvm::Value *Value,
     const ASTRecordLayout &layout = getContext().getASTRecordLayout(Derived);
     CharUnits vBaseOffset = layout.getVBaseClassOffset(VBase);
     NonVirtualOffset += vBaseOffset;
-    VBase = 0; // we no longer have a virtual step
+    VBase = nullptr; // we no longer have a virtual step
   }
 
   // Get the base pointer type.
@@ -180,11 +180,11 @@ CodeGenFunction::GetAddressOfBaseClass(llvm::Value *Value,
   // just do a bitcast; null checks are unnecessary.
   if (NonVirtualOffset.isZero() && !VBase) {
     return Builder.CreateBitCast(Value, BasePtrTy);
-  }    
+  }
+
+  llvm::BasicBlock *origBB = nullptr;
+  llvm::BasicBlock *endBB = nullptr;
 
-  llvm::BasicBlock *origBB = 0;
-  llvm::BasicBlock *endBB = 0;
-  
   // Skip over the offset (and the vtable load) if we're supposed to
   // null-check the pointer.
   if (NullCheckValue) {
@@ -198,7 +198,7 @@ CodeGenFunction::GetAddressOfBaseClass(llvm::Value *Value,
   }
 
   // Compute the virtual offset.
-  llvm::Value *VirtualOffset = 0;
+  llvm::Value *VirtualOffset = nullptr;
   if (VBase) {
     VirtualOffset =
       CGM.getCXXABI().GetVirtualBaseClassOffset(*this, Value, Derived, VBase);
@@ -246,11 +246,11 @@ CodeGenFunction::GetAddressOfDerivedClass(llvm::Value *Value,
     // No offset, we can just cast back.
     return Builder.CreateBitCast(Value, DerivedPtrTy);
   }
-  
-  llvm::BasicBlock *CastNull = 0;
-  llvm::BasicBlock *CastNotNull = 0;
-  llvm::BasicBlock *CastEnd = 0;
-  
+
+  llvm::BasicBlock *CastNull = nullptr;
+  llvm::BasicBlock *CastNotNull = nullptr;
+  llvm::BasicBlock *CastEnd = nullptr;
+
   if (NullCheckValue) {
     CastNull = createBasicBlock("cast.null");
     CastNotNull = createBasicBlock("cast.notnull");
@@ -290,7 +290,7 @@ llvm::Value *CodeGenFunction::GetVTTParameter(GlobalDecl GD,
                                               bool Delegating) {
   if (!CGM.getCXXABI().NeedsVTTParameter(GD)) {
     // This constructor/destructor does not need a VTT parameter.
-    return 0;
+    return nullptr;
   }
   
   const CXXRecordDecl *RD = cast<CXXMethodDecl>(CurCodeDecl)->getParent();
@@ -342,7 +342,7 @@ namespace {
     CallBaseDtor(const CXXRecordDecl *Base, bool BaseIsVirtual)
       : BaseClass(Base), BaseIsVirtual(BaseIsVirtual) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       const CXXRecordDecl *DerivedClass =
         cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent();
 
@@ -454,7 +454,7 @@ static void EmitAggMemberInitializer(CodeGenFunction &CGF,
 
     switch (CGF.getEvaluationKind(T)) {
     case TEK_Scalar:
-      CGF.EmitScalarInit(Init, /*decl*/ 0, LV, false);
+      CGF.EmitScalarInit(Init, /*decl*/ nullptr, LV, false);
       break;
     case TEK_Complex:
       CGF.EmitComplexExprIntoLValue(Init, LV, /*isInit*/ true);
@@ -549,10 +549,8 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
     // If we are initializing an anonymous union field, drill down to
     // the field.
     IndirectFieldDecl *IndirectField = MemberInit->getIndirectMember();
-    IndirectFieldDecl::chain_iterator I = IndirectField->chain_begin(),
-      IEnd = IndirectField->chain_end();
-    for ( ; I != IEnd; ++I)
-      LHS = CGF.EmitLValueForFieldInitialization(LHS, cast<FieldDecl>(*I));
+    for (const auto *I : IndirectField->chain())
+      LHS = CGF.EmitLValueForFieldInitialization(LHS, cast<FieldDecl>(I));
     FieldType = MemberInit->getIndirectMember()->getAnonField()->getType();
   } else {
     LHS = CGF.EmitLValueForFieldInitialization(LHS, Field);
@@ -609,7 +607,7 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field,
     EmitComplexExprIntoLValue(Init, LHS, /*isInit*/ true);
     break;
   case TEK_Aggregate: {
-    llvm::Value *ArrayIndexVar = 0;
+    llvm::Value *ArrayIndexVar = nullptr;
     if (ArrayIndexes.size()) {
       llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
       
@@ -699,6 +697,10 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
   const CXXConstructorDecl *Ctor = cast<CXXConstructorDecl>(CurGD.getDecl());
   CXXCtorType CtorType = CurGD.getCtorType();
 
+  assert((CGM.getTarget().getCXXABI().hasConstructorVariants() ||
+          CtorType == Ctor_Complete) &&
+         "can only generate complete ctor for this ABI");
+
   // Before we go any further, try the complete->base constructor
   // delegation optimization.
   if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) &&
@@ -717,6 +719,9 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
   if (IsTryBody)
     EnterCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 
+  RegionCounter Cnt = getPGORegionCounter(Body);
+  Cnt.beginRegion(Builder);
+
   RunCleanupsScope RunCleanups(*this);
 
   // TODO: in restricted cases, we can emit the vbase initializers of
@@ -772,8 +777,8 @@ namespace {
                     const VarDecl *SrcRec)
       : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), 
         RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
-        FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0),
-        LastAddedFieldIndex(0) { }
+        FirstField(nullptr), LastField(nullptr), FirstFieldOffset(0),
+        LastFieldOffset(0), LastAddedFieldIndex(0) {}
 
     static bool isMemcpyableField(FieldDecl *F) {
       Qualifiers Qual = F->getType().getQualifiers();
@@ -783,7 +788,7 @@ namespace {
     }
 
     void addMemcpyableField(FieldDecl *F) {
-      if (FirstField == 0)
+      if (!FirstField)
         addInitialField(F);
       else
         addNextField(F);
@@ -805,7 +810,7 @@ namespace {
     void emitMemcpy() {
       // Give the subclass a chance to bail out if it feels the memcpy isn't
       // worth it (e.g. Hasn't aggregated enough data).
-      if (FirstField == 0) {
+      if (!FirstField) {
         return;
       }
 
@@ -839,7 +844,7 @@ namespace {
     }
 
     void reset() {
-      FirstField = 0;
+      FirstField = nullptr;
     }
 
   protected:
@@ -912,7 +917,7 @@ namespace {
                                                FunctionArgList &Args) {
       if (CD->isCopyOrMoveConstructor() && CD->isDefaulted())
         return Args[Args.size() - 1];
-      return 0; 
+      return nullptr;
     }
 
     // Returns true if a CXXCtorInitializer represents a member initialization
@@ -921,7 +926,7 @@ namespace {
       if (!MemcpyableCtor)
         return false;
       FieldDecl *Field = MemberInit->getMember();
-      assert(Field != 0 && "No field for member init.");
+      assert(Field && "No field for member init.");
       QualType FieldType = Field->getType();
       CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
 
@@ -1009,71 +1014,71 @@ namespace {
     // exists. Otherwise returns null.
     FieldDecl *getMemcpyableField(Stmt *S) {
       if (!AssignmentsMemcpyable)
-        return 0;
+        return nullptr;
       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
         // Recognise trivial assignments.
         if (BO->getOpcode() != BO_Assign)
-          return 0;
+          return nullptr;
         MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS());
         if (!ME)
-          return 0;
+          return nullptr;
         FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
         if (!Field || !isMemcpyableField(Field))
-          return 0;
+          return nullptr;
         Stmt *RHS = BO->getRHS();
         if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS))
           RHS = EC->getSubExpr();
         if (!RHS)
-          return 0;
+          return nullptr;
         MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
         if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
-          return 0;
+          return nullptr;
         return Field;
       } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
         CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
         if (!(MD && (MD->isCopyAssignmentOperator() ||
                        MD->isMoveAssignmentOperator()) &&
               MD->isTrivial()))
-          return 0;
+          return nullptr;
         MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument());
         if (!IOA)
-          return 0;
+          return nullptr;
         FieldDecl *Field = dyn_cast<FieldDecl>(IOA->getMemberDecl());
         if (!Field || !isMemcpyableField(Field))
-          return 0;
+          return nullptr;
         MemberExpr *Arg0 = dyn_cast<MemberExpr>(MCE->getArg(0));
         if (!Arg0 || Field != dyn_cast<FieldDecl>(Arg0->getMemberDecl()))
-          return 0;
+          return nullptr;
         return Field;
       } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
         FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl());
         if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy)
-          return 0;
+          return nullptr;
         Expr *DstPtr = CE->getArg(0);
         if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr))
           DstPtr = DC->getSubExpr();
         UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr);
         if (!DUO || DUO->getOpcode() != UO_AddrOf)
-          return 0;
+          return nullptr;
         MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr());
         if (!ME)
-          return 0;
+          return nullptr;
         FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
         if (!Field || !isMemcpyableField(Field))
-          return 0;
+          return nullptr;
         Expr *SrcPtr = CE->getArg(1);
         if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr))
           SrcPtr = SC->getSubExpr();
         UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr);
         if (!SUO || SUO->getOpcode() != UO_AddrOf)
-          return 0;
+          return nullptr;
         MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr());
         if (!ME2 || Field != dyn_cast<FieldDecl>(ME2->getMemberDecl()))
-          return 0;
+          return nullptr;
         return Field;
       }
 
-      return 0;
+      return nullptr;
     }
 
     bool AssignmentsMemcpyable;
@@ -1132,7 +1137,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
   CXXConstructorDecl::init_const_iterator B = CD->init_begin(),
                                           E = CD->init_end();
 
-  llvm::BasicBlock *BaseCtorContinueBB = 0;
+  llvm::BasicBlock *BaseCtorContinueBB = nullptr;
   if (ClassDecl->getNumVBases() &&
       !CGM.getTarget().getCXXABI().hasConstructorVariants()) {
     // The ABIs that don't have constructor variants need to put a branch
@@ -1190,23 +1195,17 @@ HasTrivialDestructorBody(ASTContext &Context,
     return false;
 
   // Check fields.
-  for (CXXRecordDecl::field_iterator I = BaseClassDecl->field_begin(),
-       E = BaseClassDecl->field_end(); I != E; ++I) {
-    const FieldDecl *Field = *I;
-    
+  for (const auto *Field : BaseClassDecl->fields())
     if (!FieldHasTrivialDestructorBody(Context, Field))
       return false;
-  }
 
   // Check non-virtual bases.
-  for (CXXRecordDecl::base_class_const_iterator I = 
-       BaseClassDecl->bases_begin(), E = BaseClassDecl->bases_end();
-       I != E; ++I) {
-    if (I->isVirtual())
+  for (const auto &I : BaseClassDecl->bases()) {
+    if (I.isVirtual())
       continue;
 
     const CXXRecordDecl *NonVirtualBase =
-      cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+      cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
     if (!HasTrivialDestructorBody(Context, NonVirtualBase,
                                   MostDerivedClassDecl))
       return false;
@@ -1214,11 +1213,9 @@ HasTrivialDestructorBody(ASTContext &Context,
 
   if (BaseClassDecl == MostDerivedClassDecl) {
     // Check virtual bases.
-    for (CXXRecordDecl::base_class_const_iterator I = 
-         BaseClassDecl->vbases_begin(), E = BaseClassDecl->vbases_end();
-         I != E; ++I) {
+    for (const auto &I : BaseClassDecl->vbases()) {
       const CXXRecordDecl *VirtualBase =
-        cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+        cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
       if (!HasTrivialDestructorBody(Context, VirtualBase,
                                     MostDerivedClassDecl))
         return false;      
@@ -1251,13 +1248,9 @@ static bool CanSkipVTablePointerInitialization(ASTContext &Context,
 
   // Check the fields.
   const CXXRecordDecl *ClassDecl = Dtor->getParent();
-  for (CXXRecordDecl::field_iterator I = ClassDecl->field_begin(),
-       E = ClassDecl->field_end(); I != E; ++I) {
-    const FieldDecl *Field = *I;
-
+  for (const auto *Field : ClassDecl->fields())
     if (!FieldHasTrivialDestructorBody(Context, Field))
       return false;
-  }
 
   return true;
 }
@@ -1315,6 +1308,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
   case Dtor_Base:
     assert(Body);
 
+    RegionCounter Cnt = getPGORegionCounter(Body);
+    Cnt.beginRegion(Builder);
+
     // Enter the cleanup scopes for fields and non-virtual bases.
     EnterDtorCleanups(Dtor, Dtor_Base);
 
@@ -1355,11 +1351,8 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args)
   LexicalScope Scope(*this, RootCS->getSourceRange());
 
   AssignmentMemcpyizer AM(*this, AssignOp, Args);
-  for (CompoundStmt::const_body_iterator I = RootCS->body_begin(),
-                                         E = RootCS->body_end();
-       I != E; ++I) {
-    AM.emitAssignment(*I);  
-  }
+  for (auto *I : RootCS->body())
+    AM.emitAssignment(I);  
   AM.finish();
 }
 
@@ -1368,7 +1361,7 @@ namespace {
   struct CallDtorDelete : EHScopeStack::Cleanup {
     CallDtorDelete() {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl);
       const CXXRecordDecl *ClassDecl = Dtor->getParent();
       CGF.EmitDeleteCall(Dtor->getOperatorDelete(), CGF.LoadCXXThis(),
@@ -1381,10 +1374,10 @@ namespace {
   public:
     CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition)
       : ShouldDeleteCondition(ShouldDeleteCondition) {
-      assert(ShouldDeleteCondition != NULL);
+      assert(ShouldDeleteCondition != nullptr);
     }
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete");
       llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue");
       llvm::Value *ShouldCallDelete
@@ -1413,7 +1406,7 @@ namespace {
       : field(field), destroyer(destroyer),
         useEHCleanupForArray(useEHCleanupForArray) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Find the address of the field.
       llvm::Value *thisValue = CGF.LoadCXXThis();
       QualType RecordTy = CGF.getContext().getTagDeclType(field->getParent());
@@ -1427,19 +1420,19 @@ namespace {
   };
 }
 
-/// EmitDtorEpilogue - Emit all code that comes at the end of class's
+/// \brief Emit all code that comes at the end of class's
 /// destructor. This is to call destructors on members and base classes
 /// in reverse order of their construction.
 void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
                                         CXXDtorType DtorType) {
-  assert(!DD->isTrivial() &&
-         "Should not emit dtor epilogue for trivial dtor!");
+  assert((!DD->isTrivial() || DD->hasAttr<DLLExportAttr>()) &&
+         "Should not emit dtor epilogue for non-exported trivial dtor!");
 
   // The deleting-destructor phase just needs to call the appropriate
   // operator delete that Sema picked up.
   if (DtorType == Dtor_Deleting) {
     assert(DD->getOperatorDelete() && 
-           "operator delete missing - EmitDtorEpilogue");
+           "operator delete missing - EnterDtorCleanups");
     if (CXXStructorImplicitParamValue) {
       // If there is an implicit param to the deleting dtor, it's a boolean
       // telling whether we should call delete at the end of the dtor.
@@ -1462,10 +1455,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
 
     // We push them in the forward order so that they'll be popped in
     // the reverse order.
-    for (CXXRecordDecl::base_class_const_iterator I = 
-           ClassDecl->vbases_begin(), E = ClassDecl->vbases_end();
-              I != E; ++I) {
-      const CXXBaseSpecifier &Base = *I;
+    for (const auto &Base : ClassDecl->vbases()) {
       CXXRecordDecl *BaseClassDecl
         = cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
     
@@ -1484,10 +1474,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
   assert(DtorType == Dtor_Base);
   
   // Destroy non-virtual bases.
-  for (CXXRecordDecl::base_class_const_iterator I = 
-        ClassDecl->bases_begin(), E = ClassDecl->bases_end(); I != E; ++I) {
-    const CXXBaseSpecifier &Base = *I;
-    
+  for (const auto &Base : ClassDecl->bases()) {
     // Ignore virtual bases.
     if (Base.isVirtual())
       continue;
@@ -1504,11 +1491,8 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
   }
 
   // Destroy direct fields.
-  SmallVector<const FieldDecl *, 16> FieldDecls;
-  for (CXXRecordDecl::field_iterator I = ClassDecl->field_begin(),
-       E = ClassDecl->field_end(); I != E; ++I) {
-    const FieldDecl *field = *I;
-    QualType type = field->getType();
+  for (const auto *Field : ClassDecl->fields()) {
+    QualType type = Field->getType();
     QualType::DestructionKind dtorKind = type.isDestructedType();
     if (!dtorKind) continue;
 
@@ -1517,7 +1501,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
     if (RT && RT->getDecl()->isAnonymousStructOrUnion()) continue;
 
     CleanupKind cleanupKind = getCleanupKind(dtorKind);
-    EHStack.pushCleanup<DestroyField>(cleanupKind, field,
+    EHStack.pushCleanup<DestroyField>(cleanupKind, Field,
                                       getDestroyer(dtorKind),
                                       cleanupKind & EHCleanup);
   }
@@ -1568,7 +1552,7 @@ CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
   // because of GCC extensions that permit zero-length arrays.  There
   // are probably legitimate places where we could assume that this
   // doesn't happen, but it's not clear that it's worth it.
-  llvm::BranchInst *zeroCheckBranch = 0;
+  llvm::BranchInst *zeroCheckBranch = nullptr;
 
   // Optimize for a constant count.
   llvm::ConstantInt *constantCount
@@ -1683,9 +1667,31 @@ CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
     return;
   }
 
-  // Non-trivial constructors are handled in an ABI-specific manner.
-  CGM.getCXXABI().EmitConstructorCall(*this, D, Type, ForVirtualBase,
-                                      Delegating, This, ArgBeg, ArgEnd);
+  // C++11 [class.mfct.non-static]p2:
+  //   If a non-static member function of a class X is called for an object that
+  //   is not of type X, or of a type derived from X, the behavior is undefined.
+  // FIXME: Provide a source location here.
+  EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(), This,
+                getContext().getRecordType(D->getParent()));
+
+  CallArgList Args;
+
+  // Push the this ptr.
+  Args.add(RValue::get(This), D->getThisType(getContext()));
+
+  // Add the rest of the user-supplied arguments.
+  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
+  EmitCallArgs(Args, FPT, ArgBeg, ArgEnd);
+
+  // Insert any ABI-specific implicit constructor arguments.
+  unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(
+      *this, D, Type, ForVirtualBase, Delegating, Args);
+
+  // Emit the call.
+  llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Type);
+  const CGFunctionInfo &Info =
+      CGM.getTypes().arrangeCXXConstructorCall(Args, D, Type, ExtraArgs);
+  EmitCall(Info, Callee, ReturnValueSlot(), Args, D);
 }
 
 void
@@ -1704,38 +1710,23 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
   assert(D->isInstance() &&
          "Trying to emit a member call expr on a static method!");
   
-  const FunctionProtoType *FPT = D->getType()->getAs<FunctionProtoType>();
+  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
   
   CallArgList Args;
   
   // Push the this ptr.
   Args.add(RValue::get(This), D->getThisType(getContext()));
   
-  
   // Push the src ptr.
-  QualType QT = *(FPT->arg_type_begin());
+  QualType QT = *(FPT->param_type_begin());
   llvm::Type *t = CGM.getTypes().ConvertType(QT);
   Src = Builder.CreateBitCast(Src, t);
   Args.add(RValue::get(Src), QT);
-  
+
   // Skip over first argument (Src).
-  ++ArgBeg;
-  CallExpr::const_arg_iterator Arg = ArgBeg;
-  for (FunctionProtoType::arg_type_iterator I = FPT->arg_type_begin()+1,
-       E = FPT->arg_type_end(); I != E; ++I, ++Arg) {
-    assert(Arg != ArgEnd && "Running over edge of argument list!");
-    EmitCallArg(Args, *Arg, *I);
-  }
-  // Either we've emitted all the call args, or we have a call to a
-  // variadic function.
-  assert((Arg == ArgEnd || FPT->isVariadic()) &&
-         "Extra arguments in non-variadic function!");
-  // If we still have any arguments, emit them using the type of the argument.
-  for (; Arg != ArgEnd; ++Arg) {
-    QualType ArgType = Arg->getType();
-    EmitCallArg(Args, *Arg, ArgType);
-  }
-  
+  EmitCallArgs(Args, FPT->isVariadic(), FPT->param_type_begin() + 1,
+               FPT->param_type_end(), ArgBeg + 1, ArgEnd);
+
   EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All),
            Callee, ReturnValueSlot(), Args, D);
 }
@@ -1790,7 +1781,7 @@ namespace {
                            CXXDtorType Type)
       : Dtor(D), Addr(Addr), Type(Type) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false,
                                 /*Delegating=*/true, Addr);
     }
@@ -1830,23 +1821,8 @@ void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD,
                                             bool ForVirtualBase,
                                             bool Delegating,
                                             llvm::Value *This) {
-  GlobalDecl GD(DD, Type);
-  llvm::Value *VTT = GetVTTParameter(GD, ForVirtualBase, Delegating);
-  llvm::Value *Callee = 0;
-  if (getLangOpts().AppleKext)
-    Callee = BuildAppleKextVirtualDestructorCall(DD, Type, 
-                                                 DD->getParent());
-    
-  if (!Callee)
-    Callee = CGM.GetAddrOfCXXDestructor(DD, Type);
-
-  if (DD->isVirtual())
-    This = CGM.getCXXABI().adjustThisArgumentForVirtualCall(*this, GD, This);
-
-  // FIXME: Provide a source location here.
-  EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
-                    VTT, getContext().getPointerType(getContext().VoidPtrTy),
-                    0, 0);
+  CGM.getCXXABI().EmitDestructorCall(*this, DD, Type, ForVirtualBase,
+                                     Delegating, This);
 }
 
 namespace {
@@ -1857,7 +1833,7 @@ namespace {
     CallLocalDtor(const CXXDestructorDecl *D, llvm::Value *Addr)
       : Dtor(D), Addr(Addr) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                                 /*ForVirtualBase=*/false,
                                 /*Delegating=*/false, Addr);
@@ -1894,7 +1870,7 @@ CodeGenFunction::InitializeVTablePointer(BaseSubobject Base,
     return;
 
   // Compute where to store the address point.
-  llvm::Value *VirtualOffset = 0;
+  llvm::Value *VirtualOffset = nullptr;
   CharUnits NonVirtualOffset = CharUnits::Zero();
   
   if (NeedsVirtualOffset) {
@@ -1944,10 +1920,9 @@ CodeGenFunction::InitializeVTablePointers(BaseSubobject Base,
   const CXXRecordDecl *RD = Base.getBase();
 
   // Traverse bases.
-  for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), 
-       E = RD->bases_end(); I != E; ++I) {
+  for (const auto &I : RD->bases()) {
     CXXRecordDecl *BaseDecl
-      = cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
+      = cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
 
     // Ignore classes without a vtable.
     if (!BaseDecl->isDynamicClass())
@@ -1957,7 +1932,7 @@ CodeGenFunction::InitializeVTablePointers(BaseSubobject Base,
     CharUnits BaseOffsetFromNearestVBase;
     bool BaseDeclIsNonVirtualPrimaryBase;
 
-    if (I->isVirtual()) {
+    if (I.isVirtual()) {
       // Check if we've visited this virtual base before.
       if (!VBases.insert(BaseDecl))
         continue;
@@ -1978,7 +1953,7 @@ CodeGenFunction::InitializeVTablePointers(BaseSubobject Base,
     }
     
     InitializeVTablePointers(BaseSubobject(BaseDecl, BaseOffset), 
-                             I->isVirtual() ? BaseDecl : NearestVBase,
+                             I.isVirtual() ? BaseDecl : NearestVBase,
                              BaseOffsetFromNearestVBase,
                              BaseDeclIsNonVirtualPrimaryBase, 
                              VTableClass, VBases);
@@ -1993,7 +1968,7 @@ void CodeGenFunction::InitializeVTablePointers(const CXXRecordDecl *RD) {
   // Initialize the vtable pointers for this class and all of its bases.
   VisitedVirtualBasesSetTy VBases;
   InitializeVTablePointers(BaseSubobject(RD, CharUnits::Zero()), 
-                           /*NearestVBase=*/0, 
+                           /*NearestVBase=*/nullptr,
                            /*OffsetFromNearestVBase=*/CharUnits::Zero(),
                            /*BaseIsNonVirtualPrimaryBase=*/false, RD, VBases);
 
@@ -2127,7 +2102,7 @@ void CodeGenFunction::EmitForwardingCallToLambda(
   // Prepare the return slot.
   const FunctionProtoType *FPT =
     callOperator->getType()->castAs<FunctionProtoType>();
-  QualType resultType = FPT->getResultType();
+  QualType resultType = FPT->getReturnType();
   ReturnValueSlot returnSlot;
   if (!resultType->isVoidType() &&
       calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect &&
@@ -2162,11 +2137,9 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
   CallArgs.add(RValue::get(ThisPtr), ThisType);
 
   // Add the rest of the parameters.
-  for (BlockDecl::param_const_iterator I = BD->param_begin(),
-       E = BD->param_end(); I != E; ++I) {
-    ParmVarDecl *param = *I;
+  for (auto param : BD->params())
     EmitDelegateCallArg(CallArgs, param, param->getLocStart());
-  }
+
   assert(!Lambda->isGenericLambda() && 
             "generic lambda interconversion to block not implemented");
   EmitForwardingCallToLambda(Lambda->getLambdaCallOperator(), CallArgs);
@@ -2194,11 +2167,9 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
   CallArgs.add(RValue::get(ThisPtr), ThisType);
 
   // Add the rest of the parameters.
-  for (FunctionDecl::param_const_iterator I = MD->param_begin(),
-       E = MD->param_end(); I != E; ++I) {
-    ParmVarDecl *param = *I;
-    EmitDelegateCallArg(CallArgs, param, param->getLocStart());
-  }
+  for (auto Param : MD->params())
+    EmitDelegateCallArg(CallArgs, Param, Param->getLocStart());
+
   const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
   // For a generic lambda, find the corresponding call operator specialization
   // to which the call to the static-invoker shall be forwarded.
@@ -2206,9 +2177,9 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
     assert(MD->isFunctionTemplateSpecialization());
     const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
     FunctionTemplateDecl *CallOpTemplate = CallOp->getDescribedFunctionTemplate();
-    void *InsertPos = 0;
+    void *InsertPos = nullptr;
     FunctionDecl *CorrespondingCallOpSpecialization = 
-        CallOpTemplate->findSpecialization(TAL->data(), TAL->size(), InsertPos); 
+        CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
     assert(CorrespondingCallOpSpecialization);
     CallOp = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
   }
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 65de4d498d1a..ed9f96df7987 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -50,7 +50,7 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) {
     CodeGenFunction::ComplexPairTy V = rv.getComplexVal();
     llvm::Type *ComplexTy =
       llvm::StructType::get(V.first->getType(), V.second->getType(),
-                            (void*) 0);
+                            (void*) nullptr);
     llvm::Value *addr = CGF.CreateTempAlloca(ComplexTy, "saved-complex");
     CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0));
     CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1));
@@ -245,7 +245,7 @@ void EHScopeStack::popNullFixups() {
   assert(BranchFixups.size() >= MinSize && "fixup stack out of order");
 
   while (BranchFixups.size() > MinSize &&
-         BranchFixups.back().Destination == 0)
+         BranchFixups.back().Destination == nullptr)
     BranchFixups.pop_back();
 }
 
@@ -263,7 +263,7 @@ void CodeGenFunction::initFullExprCleanup() {
 
   // Set that as the active flag in the cleanup.
   EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
-  assert(cleanup.getActiveFlag() == 0 && "cleanup already has active flag?");
+  assert(!cleanup.getActiveFlag() && "cleanup already has active flag?");
   cleanup.setActiveFlag(active);
 
   if (cleanup.isNormalCleanup()) cleanup.setTestFlagInNormalCleanup();
@@ -283,7 +283,7 @@ static void ResolveAllBranchFixups(CodeGenFunction &CGF,
   for (unsigned I = 0, E = CGF.EHStack.getNumBranchFixups(); I != E; ++I) {
     // Skip this fixup if its destination isn't set.
     BranchFixup &Fixup = CGF.EHStack.getBranchFixup(I);
-    if (Fixup.Destination == 0) continue;
+    if (Fixup.Destination == nullptr) continue;
 
     // If there isn't an OptimisticBranchBlock, then InitialBranch is
     // still pointing directly to its destination; forward it to the
@@ -293,7 +293,7 @@ static void ResolveAllBranchFixups(CodeGenFunction &CGF,
     //   lbl:
     // i.e. where there's an unresolved fixup inside a single cleanup
     // entry which we're currently popping.
-    if (Fixup.OptimisticBranchBlock == 0) {
+    if (Fixup.OptimisticBranchBlock == nullptr) {
       new llvm::StoreInst(CGF.Builder.getInt32(Fixup.DestinationIndex),
                           CGF.getNormalCleanupDestSlot(),
                           Fixup.InitialBranch);
@@ -347,7 +347,7 @@ void CodeGenFunction::ResolveBranchFixups(llvm::BasicBlock *Block) {
     BranchFixup &Fixup = EHStack.getBranchFixup(I);
     if (Fixup.Destination != Block) continue;
 
-    Fixup.Destination = 0;
+    Fixup.Destination = nullptr;
     ResolvedAny = true;
 
     // If it doesn't have an optimistic branch block, LatestBranch is
@@ -473,7 +473,7 @@ static void EmitCleanup(CodeGenFunction &CGF,
 
   // If there's an active flag, load it and skip the cleanup if it's
   // false.
-  llvm::BasicBlock *ContBB = 0;
+  llvm::BasicBlock *ContBB = nullptr;
   if (ActiveFlag) {
     ContBB = CGF.createBasicBlock("cleanup.done");
     llvm::BasicBlock *CleanupBB = CGF.createBasicBlock("cleanup.action");
@@ -528,7 +528,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF,
   llvm::BasicBlock *unreachableBB = CGF.getUnreachableBlock();
   for (llvm::BasicBlock::use_iterator
          i = entry->use_begin(), e = entry->use_end(); i != e; ) {
-    llvm::Use &use = i.getUse();
+    llvm::Use &use = *i;
     ++i;
 
     use.set(unreachableBB);
@@ -568,15 +568,15 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
   // Remember activation information.
   bool IsActive = Scope.isActive();
   llvm::Value *NormalActiveFlag =
-    Scope.shouldTestFlagInNormalCleanup() ? Scope.getActiveFlag() : 0;
+    Scope.shouldTestFlagInNormalCleanup() ? Scope.getActiveFlag() : nullptr;
   llvm::Value *EHActiveFlag = 
-    Scope.shouldTestFlagInEHCleanup() ? Scope.getActiveFlag() : 0;
+    Scope.shouldTestFlagInEHCleanup() ? Scope.getActiveFlag() : nullptr;
 
   // Check whether we need an EH cleanup.  This is only true if we've
   // generated a lazy EH cleanup block.
   llvm::BasicBlock *EHEntry = Scope.getCachedEHDispatchBlock();
-  assert(Scope.hasEHBranches() == (EHEntry != 0));
-  bool RequiresEHCleanup = (EHEntry != 0);
+  assert(Scope.hasEHBranches() == (EHEntry != nullptr));
+  bool RequiresEHCleanup = (EHEntry != nullptr);
   EHScopeStack::stable_iterator EHParent = Scope.getEnclosingEHScope();
 
   // Check the three conditions which might require a normal cleanup:
@@ -590,7 +590,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
 
   // - whether there's a fallthrough
   llvm::BasicBlock *FallthroughSource = Builder.GetInsertBlock();
-  bool HasFallthrough = (FallthroughSource != 0 && IsActive);
+  bool HasFallthrough = (FallthroughSource != nullptr && IsActive);
 
   // Branch-through fall-throughs leave the insertion point set to the
   // end of the last cleanup, which points to the current scope.  The
@@ -720,7 +720,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
       //   - if fall-through is a branch-through
       //   - if there are fixups that will be optimistically forwarded
       //     to the enclosing cleanup
-      llvm::BasicBlock *BranchThroughDest = 0;
+      llvm::BasicBlock *BranchThroughDest = nullptr;
       if (Scope.hasBranchThroughs() ||
           (FallthroughSource && FallthroughIsBranchThrough) ||
           (HasFixups && HasEnclosingCleanups)) {
@@ -729,7 +729,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
         BranchThroughDest = CreateNormalEntry(*this, cast<EHCleanupScope>(S));
       }
 
-      llvm::BasicBlock *FallthroughDest = 0;
+      llvm::BasicBlock *FallthroughDest = nullptr;
       SmallVector<llvm::Instruction*, 2> InstsToAppend;
 
       // If there's exactly one branch-after and no other threads,
@@ -860,7 +860,9 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
 
   // Emit the EH cleanup if required.
   if (RequiresEHCleanup) {
-    if (CGDebugInfo *DI = getDebugInfo())
+    CGDebugInfo *DI = getDebugInfo();
+    SaveAndRestoreLocation AutoRestoreLocation(*this, Builder);
+    if (DI)
       DI->EmitLocation(Builder, CurEHLocation);
 
     CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
@@ -941,7 +943,7 @@ void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest) {
     Fixup.Destination = Dest.getBlock();
     Fixup.DestinationIndex = Dest.getDestIndex();
     Fixup.InitialBranch = BI;
-    Fixup.OptimisticBranchBlock = 0;
+    Fixup.OptimisticBranchBlock = nullptr;
 
     Builder.ClearInsertionPoint();
     return;
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index 1bd6bba523fb..1d4606f13669 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -96,7 +96,7 @@ public:
   enum Kind { Cleanup, Catch, Terminate, Filter };
 
   EHScope(Kind kind, EHScopeStack::stable_iterator enclosingEHScope)
-    : CachedLandingPad(0), CachedEHDispatchBlock(0),
+    : CachedLandingPad(nullptr), CachedEHDispatchBlock(nullptr),
       EnclosingEHScope(enclosingEHScope) {
     CommonBits.Kind = kind;
   }
@@ -145,12 +145,12 @@ public:
   struct Handler {
     /// A type info value, or null (C++ null, not an LLVM null pointer)
     /// for a catch-all.
-    llvm::Value *Type;
+    llvm::Constant *Type;
 
     /// The catch handler for this type.
     llvm::BasicBlock *Block;
 
-    bool isCatchAll() const { return Type == 0; }
+    bool isCatchAll() const { return Type == nullptr; }
   };
 
 private:
@@ -180,10 +180,10 @@ public:
   }
 
   void setCatchAllHandler(unsigned I, llvm::BasicBlock *Block) {
-    setHandler(I, /*catchall*/ 0, Block);
+    setHandler(I, /*catchall*/ nullptr, Block);
   }
 
-  void setHandler(unsigned I, llvm::Value *Type, llvm::BasicBlock *Block) {
+  void setHandler(unsigned I, llvm::Constant *Type, llvm::BasicBlock *Block) {
     assert(I < getNumHandlers());
     getHandlers()[I].Type = Type;
     getHandlers()[I].Block = Block;
@@ -194,6 +194,15 @@ public:
     return getHandlers()[I];
   }
 
+  // Clear all handler blocks.
+  // FIXME: it's better to always call clearHandlerBlocks in DTOR and have a
+  // 'takeHandler' or some such function which removes ownership from the
+  // EHCatchScope object if the handlers should live longer than EHCatchScope.
+  void clearHandlerBlocks() {
+    for (unsigned I = 0, N = getNumHandlers(); I != N; ++I)
+      delete getHandler(I).Block;
+  }
+
   typedef const Handler *iterator;
   iterator begin() const { return getHandlers(); }
   iterator end() const { return getHandlers() + getNumHandlers(); }
@@ -259,7 +268,7 @@ public:
                  EHScopeStack::stable_iterator enclosingNormal,
                  EHScopeStack::stable_iterator enclosingEH)
     : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal),
-      NormalBlock(0), ActiveFlag(0), ExtInfo(0) {
+      NormalBlock(nullptr), ActiveFlag(nullptr), ExtInfo(nullptr) {
     CleanupBits.IsNormalCleanup = isNormal;
     CleanupBits.IsEHCleanup = isEH;
     CleanupBits.IsActive = isActive;
@@ -446,7 +455,7 @@ class EHScopeStack::iterator {
   explicit iterator(char *Ptr) : Ptr(Ptr) {}
 
 public:
-  iterator() : Ptr(0) {}
+  iterator() : Ptr(nullptr) {}
 
   EHScope *get() const { 
     return reinterpret_cast<EHScope*>(Ptr);
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index fcb26f0da615..048c8f8f3674 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -37,7 +37,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/Path.h"
 using namespace clang;
 using namespace clang::CodeGen;
 
@@ -52,30 +52,35 @@ CGDebugInfo::~CGDebugInfo() {
          "Region stack mismatch, stack not empty!");
 }
 
-
-NoLocation::NoLocation(CodeGenFunction &CGF, CGBuilderTy &B)
-  : DI(CGF.getDebugInfo()), Builder(B) {
+SaveAndRestoreLocation::SaveAndRestoreLocation(CodeGenFunction &CGF,
+                                               CGBuilderTy &B)
+    : DI(CGF.getDebugInfo()), Builder(B) {
   if (DI) {
     SavedLoc = DI->getLocation();
     DI->CurLoc = SourceLocation();
-    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
   }
 }
 
+SaveAndRestoreLocation::~SaveAndRestoreLocation() {
+  if (DI)
+    DI->EmitLocation(Builder, SavedLoc);
+}
+
+NoLocation::NoLocation(CodeGenFunction &CGF, CGBuilderTy &B)
+  : SaveAndRestoreLocation(CGF, B) {
+  if (DI)
+    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
+}
+
 NoLocation::~NoLocation() {
-  if (DI) {
+  if (DI)
     assert(Builder.getCurrentDebugLocation().isUnknown());
-    DI->CurLoc = SavedLoc;
-  }
 }
 
 ArtificialLocation::ArtificialLocation(CodeGenFunction &CGF, CGBuilderTy &B)
-  : DI(CGF.getDebugInfo()), Builder(B) {
-  if (DI) {
-    SavedLoc = DI->getLocation();
-    DI->CurLoc = SourceLocation();
+  : SaveAndRestoreLocation(CGF, B) {
+  if (DI)
     Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-  }
 }
 
 void ArtificialLocation::Emit() {
@@ -91,10 +96,8 @@ void ArtificialLocation::Emit() {
 }
 
 ArtificialLocation::~ArtificialLocation() {
-  if (DI) {
+  if (DI)
     assert(Builder.getCurrentDebugLocation().getLine() == 0);
-    DI->CurLoc = SavedLoc;
-  }
 }
 
 void CGDebugInfo::setLocation(SourceLocation Loc) {
@@ -109,17 +112,14 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
   if (LexicalBlockStack.empty()) return;
 
   SourceManager &SM = CGM.getContext().getSourceManager();
+  llvm::DIScope Scope(LexicalBlockStack.back());
   PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
-  PresumedLoc PPLoc = SM.getPresumedLoc(PrevLoc);
 
-  if (PCLoc.isInvalid() || PPLoc.isInvalid() ||
-      !strcmp(PPLoc.getFilename(), PCLoc.getFilename()))
+  if (PCLoc.isInvalid() || Scope.getFilename() == PCLoc.getFilename())
     return;
 
-  llvm::MDNode *LB = LexicalBlockStack.back();
-  llvm::DIScope Scope = llvm::DIScope(LB);
   if (Scope.isLexicalBlockFile()) {
-    llvm::DILexicalBlockFile LBF = llvm::DILexicalBlockFile(LB);
+    llvm::DILexicalBlockFile LBF = llvm::DILexicalBlockFile(Scope);
     llvm::DIDescriptor D
       = DBuilder.createLexicalBlockFile(LBF.getScope(),
                                         getOrCreateFile(CurLoc));
@@ -225,34 +225,20 @@ StringRef CGDebugInfo::getSelectorName(Selector S) {
 /// getClassName - Get class name including template argument list.
 StringRef
 CGDebugInfo::getClassName(const RecordDecl *RD) {
-  const ClassTemplateSpecializationDecl *Spec
-    = dyn_cast<ClassTemplateSpecializationDecl>(RD);
-  if (!Spec)
+  // quick optimization to avoid having to intern strings that are already
+  // stored reliably elsewhere
+  if (!isa<ClassTemplateSpecializationDecl>(RD))
     return RD->getName();
 
-  const TemplateArgument *Args;
-  unsigned NumArgs;
-  if (TypeSourceInfo *TAW = Spec->getTypeAsWritten()) {
-    const TemplateSpecializationType *TST =
-      cast<TemplateSpecializationType>(TAW->getType());
-    Args = TST->getArgs();
-    NumArgs = TST->getNumArgs();
-  } else {
-    const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
-    Args = TemplateArgs.data();
-    NumArgs = TemplateArgs.size();
-  }
-  StringRef Name = RD->getIdentifier()->getName();
-  PrintingPolicy Policy(CGM.getLangOpts());
-  SmallString<128> TemplateArgList;
+  SmallString<128> Name;
   {
-    llvm::raw_svector_ostream OS(TemplateArgList);
-    TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
-                                                          Policy);
+    llvm::raw_svector_ostream OS(Name);
+    RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(),
+                             /*Qualified*/ false);
   }
 
   // Copy this name on the side and use its reference.
-  return internString(Name, TemplateArgList);
+  return internString(Name);
 }
 
 /// getOrCreateFile - Get the file debug info descriptor for the input location.
@@ -328,11 +314,18 @@ StringRef CGDebugInfo::getCurrentDirname() {
 /// CreateCompileUnit - Create new compile unit.
 void CGDebugInfo::CreateCompileUnit() {
 
+  // Should we be asking the SourceManager for the main file name, instead of
+  // accepting it as an argument? This just causes the main file name to
+  // mismatch with source locations and create extra lexical scopes or
+  // mismatched debug info (a CU with a DW_AT_file of "-", because that's what
+  // the driver passed, but functions/other things have DW_AT_file of "<stdin>"
+  // because that's what the SourceManager says)
+
   // Get absolute path name.
   SourceManager &SM = CGM.getContext().getSourceManager();
   std::string MainFileName = CGM.getCodeGenOpts().MainFileName;
   if (MainFileName.empty())
-    MainFileName = "<unknown>";
+    MainFileName = "<stdin>";
 
   // The main file name provided via the "-main-file-name" option contains just
   // the file name itself with no path information. This file name may have had
@@ -342,9 +335,9 @@ void CGDebugInfo::CreateCompileUnit() {
   if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
     MainFileDir = MainFile->getDir()->getName();
     if (MainFileDir != ".") {
-      llvm::SmallString<1024> MainFileDirSS(MainFileDir);
-      llvm::sys::path::append(MainFileDirSS, MainFileName);
-      MainFileName = MainFileDirSS.str();
+      llvm::SmallString<1024> MainFileDirSS(MainFileDir);
+      llvm::sys::path::append(MainFileDirSS, MainFileName);
+      MainFileName = MainFileDirSS.str();
     }
   }
 
@@ -355,7 +348,7 @@ void CGDebugInfo::CreateCompileUnit() {
   std::string SplitDwarfFile = CGM.getCodeGenOpts().SplitDwarfFile;
   StringRef SplitDwarfFilename = internString(SplitDwarfFile);
 
-  unsigned LangTag;
+  llvm::dwarf::SourceLanguage LangTag;
   const LangOptions &LO = CGM.getLangOpts();
   if (LO.CPlusPlus) {
     if (LO.ObjC1)
@@ -379,16 +372,19 @@ void CGDebugInfo::CreateCompileUnit() {
 
   // Create new compile unit.
   // FIXME - Eliminate TheCU.
-  TheCU = DBuilder.createCompileUnit(LangTag, Filename, getCurrentDirname(),
-                                     Producer, LO.Optimize,
-                                     CGM.getCodeGenOpts().DwarfDebugFlags,
-                                     RuntimeVers, SplitDwarfFilename);
+  TheCU = DBuilder.createCompileUnit(
+      LangTag, Filename, getCurrentDirname(), Producer, LO.Optimize,
+      CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, SplitDwarfFilename,
+      DebugKind <= CodeGenOptions::DebugLineTablesOnly
+          ? llvm::DIBuilder::LineTablesOnly
+          : llvm::DIBuilder::FullDebug,
+      DebugKind != CodeGenOptions::LocTrackingOnly);
 }
 
 /// CreateType - Get the Basic type from the cache or create a new
 /// one if necessary.
 llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
-  unsigned Encoding = 0;
+  llvm::dwarf::TypeKind Encoding;
   StringRef BTName;
   switch (BT->getKind()) {
 #define BUILTIN_TYPE(Id, SingletonId)
@@ -402,11 +398,10 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
   case BuiltinType::Void:
     return llvm::DIType();
   case BuiltinType::ObjCClass:
-    if (ClassTy)
-      return ClassTy;
-    ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                         "objc_class", TheCU,
-                                         getOrCreateMainFile(), 0);
+    if (!ClassTy)
+      ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+                                           "objc_class", TheCU,
+                                           getOrCreateMainFile(), 0);
     return ClassTy;
   case BuiltinType::ObjCId: {
     // typedef struct objc_class *Class;
@@ -435,12 +430,10 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
     return ObjTy;
   }
   case BuiltinType::ObjCSel: {
-    if (SelTy)
-      return SelTy;
-    SelTy =
-      DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                 "objc_selector", TheCU, getOrCreateMainFile(),
-                                 0);
+    if (!SelTy)
+      SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+                                         "objc_selector", TheCU,
+                                         getOrCreateMainFile(), 0);
     return SelTy;
   }
 
@@ -515,7 +508,7 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
 
 llvm::DIType CGDebugInfo::CreateType(const ComplexType *Ty) {
   // Bit size, align and offset of the type.
-  unsigned Encoding = llvm::dwarf::DW_ATE_complex_float;
+  llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
   if (Ty->isComplexIntegerType())
     Encoding = llvm::dwarf::DW_ATE_lo_user;
 
@@ -540,7 +533,7 @@ llvm::DIType CGDebugInfo::CreateQualifiedType(QualType Ty, llvm::DIFile Unit) {
 
   // We will create one Derived type for one qualifier and recurse to handle any
   // additional ones.
-  unsigned Tag;
+  llvm::dwarf::Tag Tag;
   if (Qc.hasConst()) {
     Tag = llvm::dwarf::DW_TAG_const_type;
     Qc.removeConst();
@@ -620,7 +613,7 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
-  unsigned Tag = 0;
+  llvm::dwarf::Tag Tag;
   if (RD->isStruct() || RD->isInterface())
     Tag = llvm::dwarf::DW_TAG_structure_type;
   else if (RD->isUnion())
@@ -632,11 +625,13 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
 
   // Create the type.
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
-  return DBuilder.createForwardDecl(Tag, RDName, Ctx, DefUnit, Line, 0, 0, 0,
-                                    FullName);
+  llvm::DICompositeType RetTy = DBuilder.createReplaceableForwardDecl(
+      Tag, RDName, Ctx, DefUnit, Line, 0, 0, 0, FullName);
+  ReplaceMap.push_back(std::make_pair(Ty, static_cast<llvm::Value *>(RetTy)));
+  return RetTy;
 }
 
-llvm::DIType CGDebugInfo::CreatePointerLikeType(unsigned Tag,
+llvm::DIType CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
                                                 const Type *Ty,
                                                 QualType PointeeTy,
                                                 llvm::DIFile Unit) {
@@ -728,22 +723,47 @@ llvm::DIType CGDebugInfo::CreateType(const BlockPointerType *Ty,
   return BlockLiteralGeneric;
 }
 
+llvm::DIType CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, llvm::DIFile Unit) {
+  assert(Ty->isTypeAlias());
+  llvm::DIType Src = getOrCreateType(Ty->getAliasedType(), Unit);
+
+  SmallString<128> NS;
+  llvm::raw_svector_ostream OS(NS);
+  Ty->getTemplateName().print(OS, CGM.getContext().getPrintingPolicy(), /*qualified*/ false);
+
+  TemplateSpecializationType::PrintTemplateArgumentList(
+      OS, Ty->getArgs(), Ty->getNumArgs(),
+      CGM.getContext().getPrintingPolicy());
+
+  TypeAliasDecl *AliasDecl =
+      cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl())
+          ->getTemplatedDecl();
+
+  SourceLocation Loc = AliasDecl->getLocation();
+  llvm::DIFile File = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
+
+  llvm::DIDescriptor Ctxt = getContextDescriptor(cast<Decl>(AliasDecl->getDeclContext()));
+
+  return DBuilder.createTypedef(Src, internString(OS.str()), File, Line, Ctxt);
+}
+
 llvm::DIType CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile Unit) {
   // Typedefs are derived from some other type.  If we have a typedef of a
   // typedef, make sure to emit the whole chain.
   llvm::DIType Src = getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit);
-  if (!Src)
-    return llvm::DIType();
   // We don't set size information, but do specify where the typedef was
   // declared.
-  unsigned Line = getLineNumber(Ty->getDecl()->getLocation());
+  SourceLocation Loc = Ty->getDecl()->getLocation();
+  llvm::DIFile File = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
   const TypedefNameDecl *TyDecl = Ty->getDecl();
 
   llvm::DIDescriptor TypedefContext =
     getContextDescriptor(cast<Decl>(Ty->getDecl()->getDeclContext()));
 
   return
-    DBuilder.createTypedef(Src, TyDecl->getName(), Unit, Line, TypedefContext);
+    DBuilder.createTypedef(Src, TyDecl->getName(), File, Line, TypedefContext);
 }
 
 llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty,
@@ -751,15 +771,17 @@ llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty,
   SmallVector<llvm::Value *, 16> EltTys;
 
   // Add the result type at least.
-  EltTys.push_back(getOrCreateType(Ty->getResultType(), Unit));
+  EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit));
 
   // Set up remainder of arguments if there is a prototype.
-  // FIXME: IF NOT, HOW IS THIS REPRESENTED?  llvm-gcc doesn't represent '...'!
+  // otherwise emit it as a variadic function.
   if (isa<FunctionNoProtoType>(Ty))
     EltTys.push_back(DBuilder.createUnspecifiedParameter());
   else if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(Ty)) {
-    for (unsigned i = 0, e = FPT->getNumArgs(); i != e; ++i)
-      EltTys.push_back(getOrCreateType(FPT->getArgType(i), Unit));
+    for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
+      EltTys.push_back(getOrCreateType(FPT->getParamType(i), Unit));
+    if (FPT->isVariadic())
+      EltTys.push_back(DBuilder.createUnspecifiedParameter());
   }
 
   llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(EltTys);
@@ -784,7 +806,7 @@ llvm::DIType CGDebugInfo::createFieldType(StringRef name,
   uint64_t sizeInBits = 0;
   unsigned alignInBits = 0;
   if (!type->isIncompleteArrayType()) {
-    llvm::tie(sizeInBits, alignInBits) = CGM.getContext().getTypeInfo(type);
+    std::tie(sizeInBits, alignInBits) = CGM.getContext().getTypeInfo(type);
 
     if (sizeInBitsOverride)
       sizeInBits = sizeInBitsOverride;
@@ -813,7 +835,7 @@ CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
   unsigned fieldno = 0;
   for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(),
          E = CXXDecl->captures_end(); I != E; ++I, ++Field, ++fieldno) {
-    const LambdaExpr::Capture C = *I;
+    const LambdaCapture &C = *I;
     if (C.capturesVariable()) {
       VarDecl *V = C.getCapturedVar();
       llvm::DIFile VUnit = getOrCreateFile(C.getLocation());
@@ -857,7 +879,7 @@ CGDebugInfo::CreateRecordStaticField(const VarDecl *Var,
 
   unsigned LineNumber = getLineNumber(Var->getLocation());
   StringRef VName = Var->getName();
-  llvm::Constant *C = NULL;
+  llvm::Constant *C = nullptr;
   if (Var->getInit()) {
     const APValue *Value = Var->evaluateValue();
     if (Value) {
@@ -926,9 +948,8 @@ void CGDebugInfo::CollectRecordFields(const RecordDecl *record,
 
     // Static and non-static members should appear in the same order as
     // the corresponding declarations in the source program.
-    for (RecordDecl::decl_iterator I = record->decls_begin(),
-           E = record->decls_end(); I != E; ++I)
-      if (const VarDecl *V = dyn_cast<VarDecl>(*I)) {
+    for (const auto *I : record->decls())
+      if (const auto *V = dyn_cast<VarDecl>(I)) {
         // Reuse the existing static member declaration if one exists
         llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator MI =
             StaticDataMemberCache.find(V->getCanonicalDecl());
@@ -939,7 +960,7 @@ void CGDebugInfo::CollectRecordFields(const RecordDecl *record,
               llvm::DIDerivedType(cast<llvm::MDNode>(MI->second)));
         } else
           elements.push_back(CreateRecordStaticField(V, RecordTy));
-      } else if (FieldDecl *field = dyn_cast<FieldDecl>(*I)) {
+      } else if (const auto *field = dyn_cast<FieldDecl>(I)) {
         CollectRecordNormalField(field, layout.getFieldOffset(fieldNo),
                                  tunit, elements, RecordTy);
 
@@ -1005,7 +1026,13 @@ llvm::DICompositeType CGDebugInfo::getOrCreateInstanceMethodType(
 
   llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(Elts);
 
-  return DBuilder.createSubroutineType(Unit, EltTypeArray);
+  unsigned Flags = 0;
+  if (Func->getExtProtoInfo().RefQualifier == RQ_LValue)
+    Flags |= llvm::DIDescriptor::FlagLValueReference;
+  if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
+    Flags |= llvm::DIDescriptor::FlagRValueReference;
+
+  return DBuilder.createSubroutineType(Unit, EltTypeArray, Flags);
 }
 
 /// isFunctionLocalClass - Return true if CXXRecordDecl is defined
@@ -1084,6 +1111,10 @@ CGDebugInfo::CreateCXXMemberFunction(const CXXMethodDecl *Method,
   }
   if (Method->hasPrototype())
     Flags |= llvm::DIDescriptor::FlagPrototyped;
+  if (Method->getRefQualifier() == RQ_LValue)
+    Flags |= llvm::DIDescriptor::FlagLValueReference;
+  if (Method->getRefQualifier() == RQ_RValue)
+    Flags |= llvm::DIDescriptor::FlagRValueReference;
 
   llvm::DIArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
   llvm::DISubprogram SP =
@@ -1092,7 +1123,7 @@ CGDebugInfo::CreateCXXMemberFunction(const CXXMethodDecl *Method,
                           MethodTy, /*isLocalToUnit=*/false,
                           /* isDefinition=*/ false,
                           Virtuality, VIndex, ContainingType,
-                          Flags, CGM.getLangOpts().Optimize, NULL,
+                          Flags, CGM.getLangOpts().Optimize, nullptr,
                           TParamsArray);
 
   SPCache[Method->getCanonicalDecl()] = llvm::WeakVH(SP);
@@ -1111,9 +1142,8 @@ CollectCXXMemberFunctions(const CXXRecordDecl *RD, llvm::DIFile Unit,
   // Since we want more than just the individual member decls if we
   // have templated functions iterate over every declaration to gather
   // the functions.
-  for(DeclContext::decl_iterator I = RD->decls_begin(),
-        E = RD->decls_end(); I != E; ++I) {
-    if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(*I)) {
+  for(const auto *I : RD->decls()) {
+    if (const auto *Method = dyn_cast<CXXMethodDecl>(I)) {
       // Reuse the existing member function declaration if it exists.
       // It may be associated with the declaration of the type & should be
       // reused as we're building the definition.
@@ -1130,16 +1160,13 @@ CollectCXXMemberFunctions(const CXXRecordDecl *RD, llvm::DIFile Unit,
           EltTys.push_back(CreateCXXMemberFunction(Method, Unit, RecordTy));
       } else
         EltTys.push_back(MI->second);
-    } else if (const FunctionTemplateDecl *FTD =
-                   dyn_cast<FunctionTemplateDecl>(*I)) {
+    } else if (const auto *FTD = dyn_cast<FunctionTemplateDecl>(I)) {
       // Add any template specializations that have already been seen. Like
       // implicit member functions, these may have been added to a declaration
       // in the case of vtable-based debug info reduction.
-      for (FunctionTemplateDecl::spec_iterator SI = FTD->spec_begin(),
-                                               SE = FTD->spec_end();
-           SI != SE; ++SI) {
+      for (const auto *SI : FTD->specializations()) {
         llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator MI =
-            SPCache.find(cast<CXXMethodDecl>(*SI)->getCanonicalDecl());
+            SPCache.find(cast<CXXMethodDecl>(SI)->getCanonicalDecl());
         if (MI != SPCache.end())
           EltTys.push_back(MI->second);
       }
@@ -1156,15 +1183,14 @@ CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile Unit,
                 llvm::DIType RecordTy) {
 
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
-  for (CXXRecordDecl::base_class_const_iterator BI = RD->bases_begin(),
-         BE = RD->bases_end(); BI != BE; ++BI) {
+  for (const auto &BI : RD->bases()) {
     unsigned BFlags = 0;
     uint64_t BaseOffset;
 
     const CXXRecordDecl *Base =
-      cast<CXXRecordDecl>(BI->getType()->getAs<RecordType>()->getDecl());
+      cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl());
 
-    if (BI->isVirtual()) {
+    if (BI.isVirtual()) {
       // virtual base offset offset is -ve. The code generator emits dwarf
       // expression where it expects +ve number.
       BaseOffset =
@@ -1176,7 +1202,7 @@ CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile Unit,
     // FIXME: Inconsistent units for BaseOffset. It is in bytes when
     // BI->isVirtual() and bits when not.
 
-    AccessSpecifier Access = BI->getAccessSpecifier();
+    AccessSpecifier Access = BI.getAccessSpecifier();
     if (Access == clang::AS_private)
       BFlags |= llvm::DIDescriptor::FlagPrivate;
     else if (Access == clang::AS_protected)
@@ -1184,7 +1210,7 @@ CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile Unit,
 
     llvm::DIType DTy =
       DBuilder.createInheritance(RecordTy,
-                                 getOrCreateType(BI->getType(), Unit),
+                                 getOrCreateType(BI.getType(), Unit),
                                  BaseOffset, BFlags);
     EltTys.push_back(DTy);
   }
@@ -1225,7 +1251,7 @@ CollectTemplateParams(const TemplateParameterList *TPList,
                                                ->getTypeForDecl())
                        : CGM.getContext().getPointerType(D->getType());
       llvm::DIType TTy = getOrCreateType(T, Unit);
-      llvm::Value *V = 0;
+      llvm::Value *V = nullptr;
       // Variable pointer template parameters have a value that is the address
       // of the variable.
       if (const VarDecl *VD = dyn_cast<VarDecl>(D))
@@ -1239,7 +1265,7 @@ CollectTemplateParams(const TemplateParameterList *TPList,
         V = CGM.GetAddrOfFunction(FD);
       // Member data pointers have special handling too to compute the fixed
       // offset within the object.
-      if (isa<FieldDecl>(D)) {
+      if (isa<FieldDecl>(D) || isa<IndirectFieldDecl>(D)) {
         // These five lines (& possibly the above member function pointer
         // handling) might be able to be refactored to use similar code in
         // CodeGenModule::getMemberPointerConstant
@@ -1257,7 +1283,7 @@ CollectTemplateParams(const TemplateParameterList *TPList,
     case TemplateArgument::NullPtr: {
       QualType T = TA.getNullPtrType();
       llvm::DIType TTy = getOrCreateType(T, Unit);
-      llvm::Value *V = 0;
+      llvm::Value *V = nullptr;
       // Special case member data pointer null values since they're actually -1
       // instead of zero.
       if (const MemberPointerType *MPT =
@@ -1287,7 +1313,7 @@ CollectTemplateParams(const TemplateParameterList *TPList,
       llvm::DITemplateValueParameter TVP =
           DBuilder.createTemplateParameterPack(
               TheCU, Name, llvm::DIType(),
-              CollectTemplateParams(NULL, TA.getPackAsArray(), Unit));
+              CollectTemplateParams(nullptr, TA.getPackAsArray(), Unit));
       TemplateParams.push_back(TVP);
     } break;
     case TemplateArgument::Expression: {
@@ -1331,14 +1357,11 @@ CollectFunctionTemplateParams(const FunctionDecl *FD, llvm::DIFile Unit) {
 llvm::DIArray CGDebugInfo::
 CollectCXXTemplateParams(const ClassTemplateSpecializationDecl *TSpecial,
                          llvm::DIFile Unit) {
-  llvm::PointerUnion<ClassTemplateDecl *,
-                     ClassTemplatePartialSpecializationDecl *>
-    PU = TSpecial->getSpecializedTemplateOrPartial();
-
-  TemplateParameterList *TPList = PU.is<ClassTemplateDecl *>() ?
-    PU.get<ClassTemplateDecl *>()->getTemplateParameters() :
-    PU.get<ClassTemplatePartialSpecializationDecl *>()->getTemplateParameters();
-  const TemplateArgumentList &TAList = TSpecial->getTemplateInstantiationArgs();
+  // Always get the full list of parameters, not just the ones from
+  // the specialization.
+  TemplateParameterList *TPList =
+    TSpecial->getSpecializedTemplate()->getTemplateParameters();
+  const TemplateArgumentList &TAList = TSpecial->getTemplateArgs();
   return CollectTemplateParams(TPList, TAList.asArray(), Unit);
 }
 
@@ -1409,6 +1432,21 @@ llvm::DIType CGDebugInfo::getOrCreateInterfaceType(QualType D,
   return T;
 }
 
+void CGDebugInfo::completeType(const EnumDecl *ED) {
+  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+    return;
+  QualType Ty = CGM.getContext().getEnumType(ED);
+  void* TyPtr = Ty.getAsOpaquePtr();
+  auto I = TypeCache.find(TyPtr);
+  if (I == TypeCache.end() ||
+      !llvm::DIType(cast<llvm::MDNode>(static_cast<llvm::Value *>(I->second)))
+           .isForwardDecl())
+    return;
+  llvm::DIType Res = CreateTypeDefinition(Ty->castAs<EnumType>());
+  assert(!Res.isForwardDecl());
+  TypeCache[TyPtr] = Res;
+}
+
 void CGDebugInfo::completeType(const RecordDecl *RD) {
   if (DebugKind > CodeGenOptions::LimitedDebugInfo ||
       !CGM.getLangOpts().CPlusPlus)
@@ -1416,6 +1454,9 @@ void CGDebugInfo::completeType(const RecordDecl *RD) {
 }
 
 void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
+  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+    return;
+
   if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
     if (CXXDecl->isDynamicClass())
       return;
@@ -1431,40 +1472,67 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) {
     return;
   QualType Ty = CGM.getContext().getRecordType(RD);
   void* TyPtr = Ty.getAsOpaquePtr();
-  if (CompletedTypeCache.count(TyPtr))
+  auto I = TypeCache.find(TyPtr);
+  if (I != TypeCache.end() &&
+      !llvm::DIType(cast<llvm::MDNode>(static_cast<llvm::Value *>(I->second)))
+           .isForwardDecl())
     return;
   llvm::DIType Res = CreateTypeDefinition(Ty->castAs<RecordType>());
   assert(!Res.isForwardDecl());
-  CompletedTypeCache[TyPtr] = Res;
   TypeCache[TyPtr] = Res;
 }
 
+static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I,
+                                        CXXRecordDecl::method_iterator End) {
+  for (; I != End; ++I)
+    if (FunctionDecl *Tmpl = I->getInstantiatedFromMemberFunction())
+      if (!Tmpl->isImplicit() && Tmpl->isThisDeclarationADefinition() &&
+          !I->getMemberSpecializationInfo()->isExplicitSpecialization())
+        return true;
+  return false;
+}
+
+static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
+                                 const RecordDecl *RD,
+                                 const LangOptions &LangOpts) {
+  if (DebugKind > CodeGenOptions::LimitedDebugInfo)
+    return false;
+
+  if (!LangOpts.CPlusPlus)
+    return false;
+
+  if (!RD->isCompleteDefinitionRequired())
+    return true;
+
+  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
+
+  if (!CXXDecl)
+    return false;
+
+  if (CXXDecl->hasDefinition() && CXXDecl->isDynamicClass())
+    return true;
+
+  TemplateSpecializationKind Spec = TSK_Undeclared;
+  if (const ClassTemplateSpecializationDecl *SD =
+          dyn_cast<ClassTemplateSpecializationDecl>(RD))
+    Spec = SD->getSpecializationKind();
+
+  if (Spec == TSK_ExplicitInstantiationDeclaration &&
+      hasExplicitMemberDefinition(CXXDecl->method_begin(),
+                                  CXXDecl->method_end()))
+    return true;
+
+  return false;
+}
+
 /// CreateType - get structure or union type.
 llvm::DIType CGDebugInfo::CreateType(const RecordType *Ty) {
   RecordDecl *RD = Ty->getDecl();
-  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
-  // Always emit declarations for types that aren't required to be complete when
-  // in limit-debug-info mode. If the type is later found to be required to be
-  // complete this declaration will be upgraded to a definition by
-  // `completeRequiredType`.
-  // If the type is dynamic, only emit the definition in TUs that require class
-  // data. This is handled by `completeClassData`.
   llvm::DICompositeType T(getTypeOrNull(QualType(Ty, 0)));
-  // If we've already emitted the type, just use that, even if it's only a
-  // declaration. The completeType, completeRequiredType, and completeClassData
-  // callbacks will handle promoting the declaration to a definition.
-  if (T ||
-      (DebugKind <= CodeGenOptions::LimitedDebugInfo &&
-       // Under -flimit-debug-info, emit only a declaration unless the type is
-       // required to be complete.
-       !RD->isCompleteDefinitionRequired() && CGM.getLangOpts().CPlusPlus) ||
-      // If the class is dynamic, only emit a declaration. A definition will be
-      // emitted whenever the vtable is emitted.
-      (CXXDecl && CXXDecl->hasDefinition() && CXXDecl->isDynamicClass()) || T) {
-    llvm::DIDescriptor FDContext =
-      getContextDescriptor(cast<Decl>(RD->getDeclContext()));
+  if (T || shouldOmitDefinition(DebugKind, RD, CGM.getLangOpts())) {
     if (!T)
-      T = getOrCreateRecordFwdDecl(Ty, FDContext);
+      T = getOrCreateRecordFwdDecl(
+          Ty, getContextDescriptor(cast<Decl>(RD->getDeclContext())));
     return T;
   }
 
@@ -1498,9 +1566,6 @@ llvm::DIType CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
   LexicalBlockStack.push_back(&*FwdDecl);
   RegionMap[Ty->getDecl()] = llvm::WeakVH(FwdDecl);
 
-  // Add this to the completed-type cache while we're completing it recursively.
-  CompletedTypeCache[QualType(Ty, 0).getAsOpaquePtr()] = FwdDecl;
-
   // Convert all the elements.
   SmallVector<llvm::Value *, 16> EltTys;
   // what about nested types?
@@ -1572,20 +1637,28 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   // Get overall information about the record type for the debug info.
   llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
   unsigned Line = getLineNumber(ID->getLocation());
-  unsigned RuntimeLang = TheCU.getLanguage();
+  llvm::dwarf::SourceLanguage RuntimeLang = TheCU.getLanguage();
 
   // If this is just a forward declaration return a special forward-declaration
   // debug type since we won't be able to lay out the entire type.
   ObjCInterfaceDecl *Def = ID->getDefinition();
-  if (!Def) {
-    llvm::DIType FwdDecl =
-      DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                 ID->getName(), TheCU, DefUnit, Line,
-                                 RuntimeLang);
+  if (!Def || !Def->getImplementation()) {
+    llvm::DIType FwdDecl = DBuilder.createReplaceableForwardDecl(
+        llvm::dwarf::DW_TAG_structure_type, ID->getName(), TheCU, DefUnit, Line,
+        RuntimeLang);
+    ObjCInterfaceCache.push_back(ObjCInterfaceCacheEntry(Ty, FwdDecl, Unit));
     return FwdDecl;
   }
 
-  ID = Def;
+
+  return CreateTypeDefinition(Ty, Unit);
+}
+
+llvm::DIType CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, llvm::DIFile Unit) {
+  ObjCInterfaceDecl *ID = Ty->getDecl();
+  llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
+  unsigned Line = getLineNumber(ID->getLocation());
+  unsigned RuntimeLang = TheCU.getLanguage();
 
   // Bit size, align and offset of the type.
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -1600,10 +1673,8 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
                               Line, Size, Align, Flags,
                               llvm::DIType(), llvm::DIArray(), RuntimeLang);
 
-  // Otherwise, insert it into the CompletedTypeCache so that recursive uses
-  // will find it and we're emitting the complete type.
-  QualType QualTy = QualType(Ty, 0);
-  CompletedTypeCache[QualTy.getAsOpaquePtr()] = RealDecl;
+  QualType QTy(Ty, 0);
+  TypeCache[QTy.getAsOpaquePtr()] = RealDecl;
 
   // Push the struct on region stack.
   LexicalBlockStack.push_back(static_cast<llvm::MDNode*>(RealDecl));
@@ -1625,9 +1696,7 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   }
 
   // Create entries for all of the properties.
-  for (ObjCContainerDecl::prop_iterator I = ID->prop_begin(),
-         E = ID->prop_end(); I != E; ++I) {
-    const ObjCPropertyDecl *PD = *I;
+  for (const auto *PD : ID->properties()) {
     SourceLocation Loc = PD->getLocation();
     llvm::DIFile PUnit = getOrCreateFile(Loc);
     unsigned PLine = getLineNumber(Loc);
@@ -1697,7 +1766,7 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
     else if (Field->getAccessControl() == ObjCIvarDecl::Private)
       Flags = llvm::DIDescriptor::FlagPrivate;
 
-    llvm::MDNode *PropertyNode = NULL;
+    llvm::MDNode *PropertyNode = nullptr;
     if (ObjCImplementationDecl *ImpD = ID->getImplementation()) {
       if (ObjCPropertyImplDecl *PImpD =
           ImpD->FindPropertyImplIvarDecl(Field->getIdentifier())) {
@@ -1729,12 +1798,6 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
   RealDecl.setTypeArray(Elements);
 
-  // If the implementation is not yet set, we do not want to mark it
-  // as complete. An implementation may declare additional
-  // private ivars that we would miss otherwise.
-  if (ID->getImplementation() == 0)
-    CompletedTypeCache.erase(QualTy.getAsOpaquePtr());
-
   LexicalBlockStack.pop_back();
   return RealDecl;
 }
@@ -1829,11 +1892,13 @@ llvm::DIType CGDebugInfo::CreateType(const MemberPointerType *Ty,
   if (!Ty->getPointeeType()->isFunctionType())
     return DBuilder.createMemberPointerType(
         getOrCreateType(Ty->getPointeeType(), U), ClassType);
+
+  const FunctionProtoType *FPT =
+    Ty->getPointeeType()->getAs<FunctionProtoType>();
   return DBuilder.createMemberPointerType(getOrCreateInstanceMethodType(
-      CGM.getContext().getPointerType(
-          QualType(Ty->getClass(), Ty->getPointeeType().getCVRQualifiers())),
-      Ty->getPointeeType()->getAs<FunctionProtoType>(), U),
-                                          ClassType);
+      CGM.getContext().getPointerType(QualType(Ty->getClass(),
+                                               FPT->getTypeQuals())),
+      FPT, U), ClassType);
 }
 
 llvm::DIType CGDebugInfo::CreateType(const AtomicType *Ty,
@@ -1863,17 +1928,31 @@ llvm::DIType CGDebugInfo::CreateEnumType(const EnumType *Ty) {
     llvm::DIFile DefUnit = getOrCreateFile(ED->getLocation());
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
-    return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_enumeration_type,
-                                      EDName, EDContext, DefUnit, Line, 0,
-                                      Size, Align, FullName);
+    llvm::DIType RetTy = DBuilder.createReplaceableForwardDecl(
+        llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
+        0, Size, Align, FullName);
+    ReplaceMap.push_back(std::make_pair(Ty, static_cast<llvm::Value *>(RetTy)));
+    return RetTy;
+  }
+
+  return CreateTypeDefinition(Ty);
+}
+
+llvm::DIType CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
+  const EnumDecl *ED = Ty->getDecl();
+  uint64_t Size = 0;
+  uint64_t Align = 0;
+  if (!ED->getTypeForDecl()->isIncompleteType()) {
+    Size = CGM.getContext().getTypeSize(ED->getTypeForDecl());
+    Align = CGM.getContext().getTypeAlign(ED->getTypeForDecl());
   }
 
+  SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+
   // Create DIEnumerator elements for each enumerator.
   SmallVector<llvm::Value *, 16> Enumerators;
   ED = ED->getDefinition();
-  for (EnumDecl::enumerator_iterator
-         Enum = ED->enumerator_begin(), EnumEnd = ED->enumerator_end();
-       Enum != EnumEnd; ++Enum) {
+  for (const auto *Enum : ED->enumerators()) {
     Enumerators.push_back(
       DBuilder.createEnumerator(Enum->getName(),
                                 Enum->getInitVal().getSExtValue()));
@@ -1907,9 +1986,12 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
     switch (T->getTypeClass()) {
     default:
       return C.getQualifiedType(T.getTypePtr(), Quals);
-    case Type::TemplateSpecialization:
-      T = cast<TemplateSpecializationType>(T)->desugar();
-      break;
+    case Type::TemplateSpecialization: {
+      const auto *Spec = cast<TemplateSpecializationType>(T);
+      if (Spec->isTypeAlias())
+        return C.getQualifiedType(T.getTypePtr(), Quals);
+      T = Spec->desugar();
+      break; }
     case Type::TypeOfExpr:
       T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType();
       break;
@@ -1954,16 +2036,7 @@ llvm::DIType CGDebugInfo::getTypeOrNull(QualType Ty) {
   // Unwrap the type as needed for debug information.
   Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
-  // Check for existing entry.
-  if (Ty->getTypeClass() == Type::ObjCInterface) {
-    llvm::Value *V = getCachedInterfaceTypeOrNull(Ty);
-    if (V)
-      return llvm::DIType(cast<llvm::MDNode>(V));
-    else return llvm::DIType();
-  }
-
-  llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
-    TypeCache.find(Ty.getAsOpaquePtr());
+  auto it = TypeCache.find(Ty.getAsOpaquePtr());
   if (it != TypeCache.end()) {
     // Verify that the debug info still exists.
     if (llvm::Value *V = it->second)
@@ -1973,41 +2046,15 @@ llvm::DIType CGDebugInfo::getTypeOrNull(QualType Ty) {
   return llvm::DIType();
 }
 
-/// getCompletedTypeOrNull - Get the type from the cache or return null if it
-/// doesn't exist.
-llvm::DIType CGDebugInfo::getCompletedTypeOrNull(QualType Ty) {
-
-  // Unwrap the type as needed for debug information.
-  Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
-
-  // Check for existing entry.
-  llvm::Value *V = 0;
-  llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
-    CompletedTypeCache.find(Ty.getAsOpaquePtr());
-  if (it != CompletedTypeCache.end())
-    V = it->second;
-  else {
-    V = getCachedInterfaceTypeOrNull(Ty);
-  }
-
-  // Verify that any cached debug info still exists.
-  return llvm::DIType(cast_or_null<llvm::MDNode>(V));
-}
-
-/// getCachedInterfaceTypeOrNull - Get the type from the interface
-/// cache, unless it needs to regenerated. Otherwise return null.
-llvm::Value *CGDebugInfo::getCachedInterfaceTypeOrNull(QualType Ty) {
-  // Is there a cached interface that hasn't changed?
-  llvm::DenseMap<void *, std::pair<llvm::WeakVH, unsigned > >
-    ::iterator it1 = ObjCInterfaceCache.find(Ty.getAsOpaquePtr());
-
-  if (it1 != ObjCInterfaceCache.end())
-    if (ObjCInterfaceDecl* Decl = getObjCInterfaceDecl(Ty))
-      if (Checksum(Decl) == it1->second.second)
-        // Return cached forward declaration.
-        return it1->second.first;
+void CGDebugInfo::completeTemplateDefinition(
+    const ClassTemplateSpecializationDecl &SD) {
+  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+    return;
 
-  return 0;
+  completeClassData(&SD);
+  // In case this type has no member function definitions being emitted, ensure
+  // it is retained
+  RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr());
 }
 
 /// getOrCreateType - Get the type from the cache or create a new
@@ -2019,7 +2066,7 @@ llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) {
   // Unwrap the type as needed for debug information.
   Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
-  if (llvm::DIType T = getCompletedTypeOrNull(Ty))
+  if (llvm::DIType T = getTypeOrNull(Ty))
     return T;
 
   // Otherwise create the type.
@@ -2029,39 +2076,6 @@ llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) {
   // And update the type cache.
   TypeCache[TyPtr] = Res;
 
-  // FIXME: this getTypeOrNull call seems silly when we just inserted the type
-  // into the cache - but getTypeOrNull has a special case for cached interface
-  // types. We should probably just pull that out as a special case for the
-  // "else" block below & skip the otherwise needless lookup.
-  llvm::DIType TC = getTypeOrNull(Ty);
-  if (TC && TC.isForwardDecl())
-    ReplaceMap.push_back(std::make_pair(TyPtr, static_cast<llvm::Value*>(TC)));
-  else if (ObjCInterfaceDecl* Decl = getObjCInterfaceDecl(Ty)) {
-    // Interface types may have elements added to them by a
-    // subsequent implementation or extension, so we keep them in
-    // the ObjCInterfaceCache together with a checksum. Instead of
-    // the (possibly) incomplete interface type, we return a forward
-    // declaration that gets RAUW'd in CGDebugInfo::finalize().
-    std::pair<llvm::WeakVH, unsigned> &V = ObjCInterfaceCache[TyPtr];
-    if (V.first)
-      return llvm::DIType(cast<llvm::MDNode>(V.first));
-    TC = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                    Decl->getName(), TheCU, Unit,
-                                    getLineNumber(Decl->getLocation()),
-                                    TheCU.getLanguage());
-    // Store the forward declaration in the cache.
-    V.first = TC;
-    V.second = Checksum(Decl);
-
-    // Register the type for replacement in finalize().
-    ReplaceMap.push_back(std::make_pair(TyPtr, static_cast<llvm::Value*>(TC)));
-
-    return TC;
-  }
-
-  if (!Res.isForwardDecl())
-    CompletedTypeCache[TyPtr] = Res;
-
   return Res;
 }
 
@@ -2073,7 +2087,7 @@ unsigned CGDebugInfo::Checksum(const ObjCInterfaceDecl *ID) {
   // a checksum.
   unsigned Sum = 0;
   for (const ObjCIvarDecl *Ivar = ID->all_declared_ivar_begin();
-       Ivar != 0; Ivar = Ivar->getNextIvar())
+       Ivar != nullptr; Ivar = Ivar->getNextIvar())
     ++Sum;
 
   return Sum;
@@ -2087,7 +2101,7 @@ ObjCInterfaceDecl *CGDebugInfo::getObjCInterfaceDecl(QualType Ty) {
   case Type::ObjCInterface:
     return cast<ObjCInterfaceType>(Ty)->getDecl();
   default:
-    return 0;
+    return nullptr;
   }
 }
 
@@ -2097,7 +2111,7 @@ llvm::DIType CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile Unit) {
   if (Ty.hasLocalQualifiers())
     return CreateQualifiedType(Ty, Unit);
 
-  const char *Diag = 0;
+  const char *Diag = nullptr;
 
   // Work out details of type.
   switch (Ty->getTypeClass()) {
@@ -2123,10 +2137,11 @@ llvm::DIType CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile Unit) {
     return CreateType(cast<ComplexType>(Ty));
   case Type::Pointer:
     return CreateType(cast<PointerType>(Ty), Unit);
+  case Type::Adjusted:
   case Type::Decayed:
-    // Decayed types are just pointers in LLVM and DWARF.
+    // Decayed and adjusted types use the adjusted type in LLVM and DWARF.
     return CreateType(
-        cast<PointerType>(cast<DecayedType>(Ty)->getDecayedType()), Unit);
+        cast<PointerType>(cast<AdjustedType>(Ty)->getAdjustedType()), Unit);
   case Type::BlockPointer:
     return CreateType(cast<BlockPointerType>(Ty), Unit);
   case Type::Typedef:
@@ -2154,8 +2169,10 @@ llvm::DIType CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile Unit) {
   case Type::Atomic:
     return CreateType(cast<AtomicType>(Ty), Unit);
 
-  case Type::Attributed:
   case Type::TemplateSpecialization:
+    return CreateType(cast<TemplateSpecializationType>(Ty), Unit);
+
+  case Type::Attributed:
   case Type::Elaborated:
   case Type::Paren:
   case Type::SubstTemplateTypeParm:
@@ -2199,10 +2216,6 @@ llvm::DIType CGDebugInfo::getOrCreateLimitedType(const RecordType *Ty,
   // correct order if the full type is needed.
   Res.setTypeArray(T.getTypeArray());
 
-  if (T && T.isForwardDecl())
-    ReplaceMap.push_back(
-        std::make_pair(QTy.getAsOpaquePtr(), static_cast<llvm::Value *>(T)));
-
   // And update the type cache.
   TypeCache[QTy.getAsOpaquePtr()] = Res;
   return Res;
@@ -2222,20 +2235,14 @@ llvm::DICompositeType CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
 
   // If we ended up creating the type during the context chain construction,
   // just return that.
-  // FIXME: this could be dealt with better if the type was recorded as
-  // completed before we started this (see the CompletedTypeCache usage in
-  // CGDebugInfo::CreateTypeDefinition(const RecordType*) - that would need to
-  // be pushed to before context creation, but after it was known to be
-  // destined for completion (might still have an issue if this caller only
-  // required a declaration but the context construction ended up creating a
-  // definition)
   llvm::DICompositeType T(getTypeOrNull(CGM.getContext().getRecordType(RD)));
   if (T && (!T.isForwardDecl() || !RD->getDefinition()))
       return T;
 
-  // If this is just a forward declaration, construct an appropriately
-  // marked node and just return it.
-  if (!RD->getDefinition())
+  // If this is just a forward or incomplete declaration, construct an
+  // appropriately marked node and just return it.
+  const RecordDecl *D = RD->getDefinition();
+  if (!D || !D->isCompleteDefinition())
     return getOrCreateRecordFwdDecl(Ty, RDContext);
 
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -2277,7 +2284,7 @@ void CGDebugInfo::CollectContainingType(const CXXRecordDecl *RD,
   llvm::DICompositeType ContainingType;
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
   if (const CXXRecordDecl *PBase = RL.getPrimaryBase()) {
-    // Seek non virtual primary base root.
+    // Seek non-virtual primary base root.
     while (1) {
       const ASTRecordLayout &BRL = CGM.getContext().getASTRecordLayout(PBase);
       const CXXRecordDecl *PBT = BRL.getPrimaryBase();
@@ -2309,7 +2316,7 @@ llvm::DIType CGDebugInfo::CreateMemberType(llvm::DIFile Unit, QualType FType,
   return Ty;
 }
 
-llvm::DIDescriptor CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
+llvm::DIScope CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
   // We only need a declaration (not a definition) of the type - so use whatever
   // we would otherwise do to get a type for a pointee. (forward declarations in
   // limited debug info, full definitions (if the type definition is available)
@@ -2327,15 +2334,15 @@ llvm::DIDescriptor CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
   llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator I =
       DeclCache.find(D->getCanonicalDecl());
   if (I == DeclCache.end())
-    return llvm::DIDescriptor();
+    return llvm::DIScope();
   llvm::Value *V = I->second;
-  return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(V));
+  return llvm::DIScope(dyn_cast_or_null<llvm::MDNode>(V));
 }
 
 /// getFunctionDeclaration - Return debug info descriptor to describe method
 /// declaration for the given method definition.
 llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) {
-  if (!D || DebugKind == CodeGenOptions::DebugLineTablesOnly)
+  if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
     return llvm::DISubprogram();
 
   const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
@@ -2352,7 +2359,6 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) {
       llvm::DICompositeType T(S);
       llvm::DISubprogram SP =
           CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()), T);
-      T.addMember(SP);
       return SP;
     }
   }
@@ -2363,9 +2369,7 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) {
       return SP;
   }
 
-  for (FunctionDecl::redecl_iterator I = FD->redecls_begin(),
-         E = FD->redecls_end(); I != E; ++I) {
-    const FunctionDecl *NextFD = *I;
+  for (auto NextFD : FD->redecls()) {
     llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator
       MI = SPCache.find(NextFD->getCanonicalDecl());
     if (MI != SPCache.end()) {
@@ -2383,7 +2387,7 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) {
 llvm::DICompositeType CGDebugInfo::getOrCreateFunctionType(const Decl *D,
                                                            QualType FnType,
                                                            llvm::DIFile F) {
-  if (!D || DebugKind == CodeGenOptions::DebugLineTablesOnly)
+  if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
     // Create fake but valid subroutine type. Otherwise
     // llvm::DISubprogram::Verify() would return false, and
     // subprogram DIE will miss DW_AT_decl_file and
@@ -2397,7 +2401,7 @@ llvm::DICompositeType CGDebugInfo::getOrCreateFunctionType(const Decl *D,
     SmallVector<llvm::Value *, 16> Elts;
 
     // First element is always return type. For 'void' functions it is NULL.
-    QualType ResultTy = OMethod->getResultType();
+    QualType ResultTy = OMethod->getReturnType();
 
     // Replace the instancetype keyword with the actual type.
     if (ResultTy == CGM.getContext().getObjCInstanceType())
@@ -2413,18 +2417,35 @@ llvm::DICompositeType CGDebugInfo::getOrCreateFunctionType(const Decl *D,
     llvm::DIType CmdTy = getOrCreateType(OMethod->getCmdDecl()->getType(), F);
     Elts.push_back(DBuilder.createArtificialType(CmdTy));
     // Get rest of the arguments.
-    for (ObjCMethodDecl::param_const_iterator PI = OMethod->param_begin(),
-           PE = OMethod->param_end(); PI != PE; ++PI)
-      Elts.push_back(getOrCreateType((*PI)->getType(), F));
+    for (const auto *PI : OMethod->params())
+      Elts.push_back(getOrCreateType(PI->getType(), F));
 
     llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(Elts);
     return DBuilder.createSubroutineType(F, EltTypeArray);
   }
+
+  // Handle variadic function types; they need an additional
+  // unspecified parameter.
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+    if (FD->isVariadic()) {
+      SmallVector<llvm::Value *, 16> EltTys;
+      EltTys.push_back(getOrCreateType(FD->getReturnType(), F));
+      if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FnType))
+        for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
+          EltTys.push_back(getOrCreateType(FPT->getParamType(i), F));
+      EltTys.push_back(DBuilder.createUnspecifiedParameter());
+      llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(EltTys);
+      return DBuilder.createSubroutineType(F, EltTypeArray);
+    }
+
   return llvm::DICompositeType(getOrCreateType(FnType, F));
 }
 
 /// EmitFunctionStart - Constructs the debug code for entering a function.
-void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType,
+void CGDebugInfo::EmitFunctionStart(GlobalDecl GD,
+                                    SourceLocation Loc,
+                                    SourceLocation ScopeLoc,
+                                    QualType FnType,
                                     llvm::Function *Fn,
                                     CGBuilderTy &Builder) {
 
@@ -2434,13 +2455,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType,
   FnBeginRegionCount.push_back(LexicalBlockStack.size());
 
   const Decl *D = GD.getDecl();
-  // Function may lack declaration in source code if it is created by Clang
-  // CodeGen (examples: _GLOBAL__I_a, __cxx_global_array_dtor, thunk).
-  bool HasDecl = (D != 0);
-  // Use the location of the declaration.
-  SourceLocation Loc;
-  if (HasDecl)
-    Loc = D->getLocation();
+  bool HasDecl = (D != nullptr);
 
   unsigned Flags = 0;
   llvm::DIFile Unit = getOrCreateFile(Loc);
@@ -2500,23 +2515,34 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType,
   if (!Name.empty() && Name[0] == '\01')
     Name = Name.substr(1);
 
-  unsigned LineNo = getLineNumber(Loc);
-  if (!HasDecl || D->isImplicit())
+  if (!HasDecl || D->isImplicit()) {
     Flags |= llvm::DIDescriptor::FlagArtificial;
+    // Artificial functions without a location should not silently reuse CurLoc.
+    if (Loc.isInvalid())
+      CurLoc = SourceLocation();
+  }
+  unsigned LineNo = getLineNumber(Loc);
+  unsigned ScopeLine = getLineNumber(ScopeLoc);
 
+  // FIXME: The function declaration we're constructing here is mostly reusing
+  // declarations from CXXMethodDecl and not constructing new ones for arbitrary
+  // FunctionDecls. When/if we fix this we can have FDContext be TheCU/null for
+  // all subprograms instead of the actual context since subprogram definitions
+  // are emitted as CU level entities by the backend.
   llvm::DISubprogram SP =
       DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo,
                               getOrCreateFunctionType(D, FnType, Unit),
                               Fn->hasInternalLinkage(), true /*definition*/,
-                              getLineNumber(CurLoc), Flags,
+                              ScopeLine, Flags,
                               CGM.getLangOpts().Optimize, Fn, TParamsArray,
                               getFunctionDeclaration(D));
   if (HasDecl)
     DeclCache.insert(std::make_pair(D->getCanonicalDecl(), llvm::WeakVH(SP)));
 
-  // Push function on region stack.
+  // Push the function onto the lexical block stack.
   llvm::MDNode *SPN = SP;
   LexicalBlockStack.push_back(SPN);
+
   if (HasDecl)
     RegionMap[D] = llvm::WeakVH(SP);
 }
@@ -2554,13 +2580,11 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc,
 /// CreateLexicalBlock - Creates a new lexical block node and pushes it on
 /// the stack.
 void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
-  llvm::DIDescriptor D =
-    DBuilder.createLexicalBlock(LexicalBlockStack.empty() ?
-                                llvm::DIDescriptor() :
-                                llvm::DIDescriptor(LexicalBlockStack.back()),
-                                getOrCreateFile(CurLoc),
-                                getLineNumber(CurLoc),
-                                getColumnNumber(CurLoc));
+  llvm::DIDescriptor D = DBuilder.createLexicalBlock(
+      llvm::DIDescriptor(LexicalBlockStack.empty() ? nullptr
+                                                   : LexicalBlockStack.back()),
+      getOrCreateFile(CurLoc), getLineNumber(CurLoc), getColumnNumber(CurLoc),
+      0);
   llvm::MDNode *DN = D;
   LexicalBlockStack.push_back(DN);
 }
@@ -2664,7 +2688,7 @@ llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
   }
 
   FType = Type;
-  llvm::DIType FieldTy = CGDebugInfo::getOrCreateType(FType, Unit);
+  llvm::DIType FieldTy = getOrCreateType(FType, Unit);
   FieldSize = CGM.getContext().getTypeSize(FType);
   FieldAlign = CGM.getContext().toBits(Align);
 
@@ -2684,7 +2708,7 @@ llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
 }
 
 /// EmitDeclare - Emit local variable declaration debug info.
-void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag,
+void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::LLVMConstants Tag,
                               llvm::Value *Storage,
                               unsigned ArgNo, CGBuilderTy &Builder) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
@@ -2768,10 +2792,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag,
     // all union fields.
     const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
     if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
-      for (RecordDecl::field_iterator I = RD->field_begin(),
-             E = RD->field_end();
-           I != E; ++I) {
-        FieldDecl *Field = *I;
+      for (const auto *Field : RD->fields()) {
         llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
         StringRef FieldName = Field->getName();
 
@@ -2824,7 +2845,6 @@ llvm::DIType CGDebugInfo::CreateSelfType(const QualType &QualTy,
                                          llvm::DIType Ty) {
   llvm::DIType CachedTy = getTypeOrNull(QualTy);
   if (CachedTy) Ty = CachedTy;
-  else DEBUG(llvm::dbgs() << "No cached type for self.");
   return DBuilder.createObjectPointerType(Ty);
 }
 
@@ -2835,7 +2855,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(const VarDecl *VD,
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
 
-  if (Builder.GetInsertBlock() == 0)
+  if (Builder.GetInsertBlock() == nullptr)
     return;
 
   bool isByRef = VD->hasAttr<BlocksAttr>();
@@ -2965,15 +2985,12 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
     BlockLayoutChunk chunk;
     chunk.OffsetInBits =
       blockLayout->getElementOffsetInBits(block.CXXThisIndex);
-    chunk.Capture = 0;
+    chunk.Capture = nullptr;
     chunks.push_back(chunk);
   }
 
   // Variable captures.
-  for (BlockDecl::capture_const_iterator
-         i = blockDecl->capture_begin(), e = blockDecl->capture_end();
-       i != e; ++i) {
-    const BlockDecl::Capture &capture = *i;
+  for (const auto &capture : blockDecl->captures()) {
     const VarDecl *variable = capture.getVariable();
     const CGBlockInfo::Capture &captureInfo = block.getCapture(variable);
 
@@ -3085,10 +3102,40 @@ CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) {
   llvm::DICompositeType Ctxt(
       getContextDescriptor(cast<Decl>(D->getDeclContext())));
   llvm::DIDerivedType T = CreateRecordStaticField(D, Ctxt);
-  Ctxt.addMember(T);
   return T;
 }
 
+/// Recursively collect all of the member fields of a global anonymous decl and
+/// create static variables for them. The first time this is called it needs
+/// to be on a union and then from there we can have additional unnamed fields.
+llvm::DIGlobalVariable
+CGDebugInfo::CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit,
+                                    unsigned LineNo, StringRef LinkageName,
+                                    llvm::GlobalVariable *Var,
+                                    llvm::DIDescriptor DContext) {
+  llvm::DIGlobalVariable GV;
+
+  for (const auto *Field : RD->fields()) {
+    llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
+    StringRef FieldName = Field->getName();
+
+    // Ignore unnamed fields, but recurse into anonymous records.
+    if (FieldName.empty()) {
+      const RecordType *RT = dyn_cast<RecordType>(Field->getType());
+      if (RT)
+        GV = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName,
+                                    Var, DContext);
+      continue;
+    }
+    // Use VarDecl's Tag, Scope and Line number.
+    GV = DBuilder.createStaticVariable(DContext, FieldName, LinkageName, Unit,
+                                       LineNo, FieldTy,
+                                       Var->hasInternalLinkage(), Var,
+                                       llvm::DIDerivedType());
+  }
+  return GV;
+}
+
 /// EmitGlobalVariable - Emit information about a global variable.
 void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
                                      const VarDecl *D) {
@@ -3109,46 +3156,36 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
     T = CGM.getContext().getConstantArrayType(ET, ConstVal,
                                               ArrayType::Normal, 0);
   }
+
   StringRef DeclName = D->getName();
   StringRef LinkageName;
-  if (D->getDeclContext() && !isa<FunctionDecl>(D->getDeclContext())
-      && !isa<ObjCMethodDecl>(D->getDeclContext()))
+  if (D->getDeclContext() && !isa<FunctionDecl>(D->getDeclContext()) &&
+      !isa<ObjCMethodDecl>(D->getDeclContext()))
     LinkageName = Var->getName();
   if (LinkageName == DeclName)
     LinkageName = StringRef();
+
   llvm::DIDescriptor DContext =
     getContextDescriptor(dyn_cast<Decl>(D->getDeclContext()));
-  llvm::DIGlobalVariable GV = DBuilder.createStaticVariable(
-      DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
-      Var->hasInternalLinkage(), Var,
-      getOrCreateStaticDataMemberDeclarationOrNull(D));
-  DeclCache.insert(std::make_pair(D->getCanonicalDecl(), llvm::WeakVH(GV)));
-}
 
-/// EmitGlobalVariable - Emit information about an objective-c interface.
-void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
-                                     ObjCInterfaceDecl *ID) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
-  // Create global variable debug descriptor.
-  llvm::DIFile Unit = getOrCreateFile(ID->getLocation());
-  unsigned LineNo = getLineNumber(ID->getLocation());
-
-  StringRef Name = ID->getName();
-
-  QualType T = CGM.getContext().getObjCInterfaceType(ID);
-  if (T->isIncompleteArrayType()) {
-
-    // CodeGen turns int[] into int[1] so we'll do the same here.
-    llvm::APInt ConstVal(32, 1);
-    QualType ET = CGM.getContext().getAsArrayType(T)->getElementType();
-
-    T = CGM.getContext().getConstantArrayType(ET, ConstVal,
-                                           ArrayType::Normal, 0);
+  // Attempt to store one global variable for the declaration - even if we
+  // emit a lot of fields.
+  llvm::DIGlobalVariable GV;
+
+  // If this is an anonymous union then we'll want to emit a global
+  // variable for each member of the anonymous union so that it's possible
+  // to find the name of any field in the union.
+  if (T->isUnionType() && DeclName.empty()) {
+    const RecordDecl *RD = cast<RecordType>(T)->getDecl();
+    assert(RD->isAnonymousStructOrUnion() && "unnamed non-anonymous struct or union?");
+    GV = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext);
+  } else {
+      GV = DBuilder.createStaticVariable(
+        DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
+        Var->hasInternalLinkage(), Var,
+        getOrCreateStaticDataMemberDeclarationOrNull(D));
   }
-
-  DBuilder.createGlobalVariable(Name, Unit, LineNo,
-                                getOrCreateType(T, Unit),
-                                Var->hasInternalLinkage(), Var);
+  DeclCache.insert(std::make_pair(D->getCanonicalDecl(), llvm::WeakVH(GV)));
 }
 
 /// EmitGlobalVariable - Emit global variable's debug info.
@@ -3167,10 +3204,20 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
   // Do not use DIGlobalVariable for enums.
   if (Ty.getTag() == llvm::dwarf::DW_TAG_enumeration_type)
     return;
+  // Do not emit separate definitions for function local const/statics.
+  if (isa<FunctionDecl>(VD->getDeclContext()))
+    return;
+  VD = cast<ValueDecl>(VD->getCanonicalDecl());
+  auto pair = DeclCache.insert(std::make_pair(VD, llvm::WeakVH()));
+  if (!pair.second)
+    return;
+  llvm::DIDescriptor DContext =
+      getContextDescriptor(dyn_cast<Decl>(VD->getDeclContext()));
   llvm::DIGlobalVariable GV = DBuilder.createStaticVariable(
-      Unit, Name, Name, Unit, getLineNumber(VD->getLocation()), Ty, true, Init,
+      DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty,
+      true, Init,
       getOrCreateStaticDataMemberDeclarationOrNull(cast<VarDecl>(VD)));
-  DeclCache.insert(std::make_pair(VD->getCanonicalDecl(), llvm::WeakVH(GV)));
+  pair.first->second = llvm::WeakVH(GV);
 }
 
 llvm::DIScope CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
@@ -3196,7 +3243,7 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
   // Emitting one decl is sufficient - debuggers can detect that this is an
   // overloaded name & provide lookup for all the overloads.
   const UsingShadowDecl &USD = **UD.shadow_begin();
-  if (llvm::DIDescriptor Target =
+  if (llvm::DIScope Target =
           getDeclarationOrDefinition(USD.getUnderlyingDecl()))
     DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target,
@@ -3206,20 +3253,20 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
 llvm::DIImportedEntity
 CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
-    return llvm::DIImportedEntity(0);
+    return llvm::DIImportedEntity(nullptr);
   llvm::WeakVH &VH = NamespaceAliasCache[&NA];
   if (VH)
     return llvm::DIImportedEntity(cast<llvm::MDNode>(VH));
-  llvm::DIImportedEntity R(0);
+  llvm::DIImportedEntity R(nullptr);
   if (const NamespaceAliasDecl *Underlying =
           dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
     // This could cache & dedup here rather than relying on metadata deduping.
-    R = DBuilder.createImportedModule(
+    R = DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
         EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()),
         NA.getName());
   else
-    R = DBuilder.createImportedModule(
+    R = DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
         getOrCreateNameSpace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
         getLineNumber(NA.getLocation()), NA.getName());
@@ -3248,23 +3295,27 @@ CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) {
 }
 
 void CGDebugInfo::finalize() {
-  for (std::vector<std::pair<void *, llvm::WeakVH> >::const_iterator VI
-         = ReplaceMap.begin(), VE = ReplaceMap.end(); VI != VE; ++VI) {
-    llvm::DIType Ty, RepTy;
-    // Verify that the debug info still exists.
-    if (llvm::Value *V = VI->second)
-      Ty = llvm::DIType(cast<llvm::MDNode>(V));
-
-    llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
-      TypeCache.find(VI->first);
-    if (it != TypeCache.end()) {
-      // Verify that the debug info still exists.
-      if (llvm::Value *V = it->second)
-        RepTy = llvm::DIType(cast<llvm::MDNode>(V));
-    }
-
-    if (Ty && Ty.isForwardDecl() && RepTy)
-      Ty.replaceAllUsesWith(RepTy);
+  // Creating types might create further types - invalidating the current
+  // element and the size(), so don't cache/reference them.
+  for (size_t i = 0; i != ObjCInterfaceCache.size(); ++i) {
+    ObjCInterfaceCacheEntry E = ObjCInterfaceCache[i];
+    E.Decl.replaceAllUsesWith(CGM.getLLVMContext(),
+                              E.Type->getDecl()->getDefinition()
+                                  ? CreateTypeDefinition(E.Type, E.Unit)
+                                  : E.Decl);
+  }
+
+  for (auto p : ReplaceMap) {
+    assert(p.second);
+    llvm::DIType Ty(cast<llvm::MDNode>(p.second));
+    assert(Ty.isForwardDecl());
+
+    auto it = TypeCache.find(p.first);
+    assert(it != TypeCache.end());
+    assert(it->second);
+
+    llvm::DIType RepTy(cast<llvm::MDNode>(it->second));
+    Ty.replaceAllUsesWith(CGM.getLLVMContext(), RepTy);
   }
 
   // We keep our own list of retained types, because we need to look
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 0ca274f56881..fc3f434991fa 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This is the source level debug info generator for llvm translation.
+// This is the source-level debug info generator for llvm translation.
 //
 //===----------------------------------------------------------------------===//
 
@@ -20,10 +20,10 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
   class MDNode;
@@ -47,8 +47,8 @@ namespace CodeGen {
 /// and is responsible for emitting to llvm globals or pass directly to
 /// the backend.
 class CGDebugInfo {
-  friend class NoLocation;
   friend class ArtificialLocation;
+  friend class SaveAndRestoreLocation;
   CodeGenModule &CGM;
   const CodeGenOptions::DebugInfoKind DebugKind;
   llvm::DIBuilder DBuilder;
@@ -65,21 +65,27 @@ class CGDebugInfo {
   llvm::DIType BlockLiteralGeneric;
 
   /// TypeCache - Cache of previously constructed Types.
-  llvm::DenseMap<void *, llvm::WeakVH> TypeCache;
+  llvm::DenseMap<const void *, llvm::WeakVH> TypeCache;
+
+  struct ObjCInterfaceCacheEntry {
+    const ObjCInterfaceType *Type;
+    llvm::DIType Decl;
+    llvm::DIFile Unit;
+    ObjCInterfaceCacheEntry(const ObjCInterfaceType *Type, llvm::DIType Decl,
+                            llvm::DIFile Unit)
+        : Type(Type), Decl(Decl), Unit(Unit) {}
+  };
 
   /// ObjCInterfaceCache - Cache of previously constructed interfaces
-  /// which may change. Storing a pair of DIType and checksum.
-  llvm::DenseMap<void *, std::pair<llvm::WeakVH, unsigned> > ObjCInterfaceCache;
+  /// which may change.
+  llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache;
 
   /// RetainedTypes - list of interfaces we want to keep even if orphaned.
   std::vector<void *> RetainedTypes;
 
-  /// CompleteTypeCache - Cache of previously constructed complete RecordTypes.
-  llvm::DenseMap<void *, llvm::WeakVH> CompletedTypeCache;
-
   /// ReplaceMap - Cache of forward declared types to RAUW at the end of
   /// compilation.
-  std::vector<std::pair<void *, llvm::WeakVH> >ReplaceMap;
+  std::vector<std::pair<const TagType *, llvm::WeakVH>> ReplaceMap;
 
   // LexicalBlockStack - Keep track of our current nested lexical block.
   std::vector<llvm::TrackingVH<llvm::MDNode> > LexicalBlockStack;
@@ -109,6 +115,7 @@ class CGDebugInfo {
   llvm::DIType CreateType(const ComplexType *Ty);
   llvm::DIType CreateQualifiedType(QualType Ty, llvm::DIFile Fg);
   llvm::DIType CreateType(const TypedefType *Ty, llvm::DIFile Fg);
+  llvm::DIType CreateType(const TemplateSpecializationType *Ty, llvm::DIFile Fg);
   llvm::DIType CreateType(const ObjCObjectPointerType *Ty,
                           llvm::DIFile F);
   llvm::DIType CreateType(const PointerType *Ty, llvm::DIFile F);
@@ -119,6 +126,7 @@ class CGDebugInfo {
   llvm::DICompositeType CreateLimitedType(const RecordType *Ty);
   void CollectContainingType(const CXXRecordDecl *RD, llvm::DICompositeType CT);
   llvm::DIType CreateType(const ObjCInterfaceType *Ty, llvm::DIFile F);
+  llvm::DIType CreateTypeDefinition(const ObjCInterfaceType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const ObjCObjectType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const VectorType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const ArrayType *Ty, llvm::DIFile F);
@@ -127,9 +135,9 @@ class CGDebugInfo {
   llvm::DIType CreateType(const MemberPointerType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const AtomicType *Ty, llvm::DIFile F);
   llvm::DIType CreateEnumType(const EnumType *Ty);
+  llvm::DIType CreateTypeDefinition(const EnumType *Ty);
   llvm::DIType CreateSelfType(const QualType &QualTy, llvm::DIType Ty);
   llvm::DIType getTypeOrNull(const QualType);
-  llvm::DIType getCompletedTypeOrNull(const QualType);
   llvm::DICompositeType getOrCreateMethodType(const CXXMethodDecl *Method,
                                               llvm::DIFile F);
   llvm::DICompositeType getOrCreateInstanceMethodType(
@@ -139,7 +147,7 @@ class CGDebugInfo {
   llvm::DIType getOrCreateVTablePtrType(llvm::DIFile F);
   llvm::DINameSpace getOrCreateNameSpace(const NamespaceDecl *N);
   llvm::DIType getOrCreateTypeDeclaration(QualType PointeeTy, llvm::DIFile F);
-  llvm::DIType CreatePointerLikeType(unsigned Tag,
+  llvm::DIType CreatePointerLikeType(llvm::dwarf::Tag Tag,
                                      const Type *Ty, QualType PointeeTy,
                                      llvm::DIFile F);
 
@@ -219,8 +227,12 @@ public:
 
   /// EmitFunctionStart - Emit a call to llvm.dbg.function.start to indicate
   /// start of a new function.
-  void EmitFunctionStart(GlobalDecl GD, QualType FnType,
-                         llvm::Function *Fn, CGBuilderTy &Builder);
+  /// \param Loc       The location of the function header.
+  /// \param ScopeLoc  The location of the function body.
+  void EmitFunctionStart(GlobalDecl GD,
+                         SourceLocation Loc, SourceLocation ScopeLoc,
+                         QualType FnType, llvm::Function *Fn,
+                         CGBuilderTy &Builder);
 
   /// EmitFunctionEnd - Constructs the debug code for exiting a function.
   void EmitFunctionEnd(CGBuilderTy &Builder);
@@ -261,9 +273,6 @@ public:
   /// EmitGlobalVariable - Emit information about a global variable.
   void EmitGlobalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
-  /// EmitGlobalVariable - Emit information about an objective-c interface.
-  void EmitGlobalVariable(llvm::GlobalVariable *GV, ObjCInterfaceDecl *Decl);
-
   /// EmitGlobalVariable - Emit global variable's debug info.
   void EmitGlobalVariable(const ValueDecl *VD, llvm::Constant *Init);
 
@@ -284,14 +293,19 @@ public:
   llvm::DIType getOrCreateInterfaceType(QualType Ty,
                                         SourceLocation Loc);
 
+  void completeType(const EnumDecl *ED);
   void completeType(const RecordDecl *RD);
   void completeRequiredType(const RecordDecl *RD);
   void completeClassData(const RecordDecl *RD);
 
+  void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD);
+
 private:
   /// EmitDeclare - Emit call to llvm.dbg.declare for a variable declaration.
-  void EmitDeclare(const VarDecl *decl, unsigned Tag, llvm::Value *AI,
-                   unsigned ArgNo, CGBuilderTy &Builder);
+  /// Tag accepts custom types DW_TAG_arg_variable and DW_TAG_auto_variable,
+  /// otherwise would be of type llvm::dwarf::Tag.
+  void EmitDeclare(const VarDecl *decl, llvm::dwarf::LLVMConstants Tag,
+                   llvm::Value *AI, unsigned ArgNo, CGBuilderTy &Builder);
 
   // EmitTypeForVarWithBlocksAttr - Build up structure info for the byref.
   // See BuildByRefType.
@@ -342,9 +356,9 @@ private:
   llvm::DIType CreateMemberType(llvm::DIFile Unit, QualType FType,
                                 StringRef Name, uint64_t *Offset);
 
-  /// \brief Retrieve the DIDescriptor, if any, for the canonical form of this
+  /// \brief Retrieve the DIScope, if any, for the canonical form of this
   /// declaration.
-  llvm::DIDescriptor getDeclarationOrDefinition(const Decl *D);
+  llvm::DIScope getDeclarationOrDefinition(const Decl *D);
 
   /// getFunctionDeclaration - Return debug info descriptor to describe method
   /// declaration for the given method definition.
@@ -355,6 +369,13 @@ private:
   llvm::DIDerivedType
   getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D);
 
+  /// Return a global variable that represents one of the collection of
+  /// global variables created for an anonmyous union.
+  llvm::DIGlobalVariable
+  CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit, unsigned LineNo,
+                         StringRef LinkageName, llvm::GlobalVariable *Var,
+                         llvm::DIDescriptor DContext);
+
   /// getFunctionName - Get function name for the given FunctionDecl. If the
   /// name is constructed on demand (e.g. C++ destructor) then the name
   /// is stored on the side.
@@ -394,16 +415,26 @@ private:
   }
 };
 
-/// NoLocation - An RAII object that temporarily disables debug
-/// locations. This is useful for emitting instructions that should be
-/// counted towards the function prologue.
-class NoLocation {
+/// SaveAndRestoreLocation - An RAII object saves the current location
+/// and automatically restores it to the original value.
+class SaveAndRestoreLocation {
+protected:
   SourceLocation SavedLoc;
   CGDebugInfo *DI;
   CGBuilderTy &Builder;
 public:
+  SaveAndRestoreLocation(CodeGenFunction &CGF, CGBuilderTy &B);
+  /// Autorestore everything back to normal.
+  ~SaveAndRestoreLocation();
+};
+
+/// NoLocation - An RAII object that temporarily disables debug
+/// locations. This is useful for emitting instructions that should be
+/// counted towards the function prologue.
+class NoLocation : public SaveAndRestoreLocation {
+public:
   NoLocation(CodeGenFunction &CGF, CGBuilderTy &B);
-  /// ~NoLocation - Autorestore everything back to normal.
+  /// Autorestore everything back to normal.
   ~NoLocation();
 };
 
@@ -418,10 +449,7 @@ public:
 /// This is necessary because passing an empty SourceLocation to
 /// CGDebugInfo::setLocation() will result in the last valid location
 /// being reused.
-class ArtificialLocation {
-  SourceLocation SavedLoc;
-  CGDebugInfo *DI;
-  CGBuilderTy &Builder;
+class ArtificialLocation : public SaveAndRestoreLocation {
 public:
   ArtificialLocation(CodeGenFunction &CGF, CGBuilderTy &B);
 
@@ -429,7 +457,7 @@ public:
   /// (= the top of the LexicalBlockStack).
   void Emit();
 
-  /// ~ArtificialLocation - Autorestore everything back to normal.
+  /// Autorestore everything back to normal.
   ~ArtificialLocation();
 };
 
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 66d6b33eb6f0..91f804193049 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -126,17 +126,11 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
 void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
   if (D.isStaticLocal()) {
     llvm::GlobalValue::LinkageTypes Linkage =
-      llvm::GlobalValue::InternalLinkage;
+        CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false);
 
-    // If the variable is externally visible, it must have weak linkage so it
-    // can be uniqued.
-    if (D.isExternallyVisible()) {
-      Linkage = llvm::GlobalValue::LinkOnceODRLinkage;
-
-      // FIXME: We need to force the emission/use of a guard variable for
-      // some variables even if we can constant-evaluate them because
-      // we can't guarantee every translation unit will constant-evaluate them.
-    }
+    // FIXME: We need to force the emission/use of a guard variable for
+    // some variables even if we can constant-evaluate them because
+    // we can't guarantee every translation unit will constant-evaluate them.
 
     return EmitStaticVarDecl(D, Linkage);
   }
@@ -155,35 +149,30 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
 static std::string GetStaticDeclName(CodeGenFunction &CGF, const VarDecl &D,
                                      const char *Separator) {
   CodeGenModule &CGM = CGF.CGM;
-  if (CGF.getLangOpts().CPlusPlus) {
-    StringRef Name = CGM.getMangledName(&D);
-    return Name.str();
-  }
 
-  std::string ContextName;
+  if (CGF.getLangOpts().CPlusPlus)
+    return CGM.getMangledName(&D).str();
+
+  StringRef ContextName;
   if (!CGF.CurFuncDecl) {
     // Better be in a block declared in global scope.
     const NamedDecl *ND = cast<NamedDecl>(&D);
     const DeclContext *DC = ND->getDeclContext();
-    if (const BlockDecl *BD = dyn_cast<BlockDecl>(DC)) {
-      MangleBuffer Name;
-      CGM.getBlockMangledName(GlobalDecl(), Name, BD);
-      ContextName = Name.getString();
-    }
+    if (const BlockDecl *BD = dyn_cast<BlockDecl>(DC))
+      ContextName = CGM.getBlockMangledName(GlobalDecl(), BD);
     else
       llvm_unreachable("Unknown context for block static var decl");
-  } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(CGF.CurFuncDecl)) {
-    StringRef Name = CGM.getMangledName(FD);
-    ContextName = Name.str();
-  } else if (isa<ObjCMethodDecl>(CGF.CurFuncDecl))
+  } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(CGF.CurFuncDecl))
+    ContextName = CGM.getMangledName(FD);
+  else if (isa<ObjCMethodDecl>(CGF.CurFuncDecl))
     ContextName = CGF.CurFn->getName();
   else
     llvm_unreachable("Unknown context for static var decl");
 
-  return ContextName + Separator + D.getNameAsString();
+  return ContextName.str() + Separator + D.getNameAsString();
 }
 
-llvm::GlobalVariable *
+llvm::Constant *
 CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
                                      const char *Separator,
                                      llvm::GlobalValue::LinkageTypes Linkage) {
@@ -203,7 +192,7 @@ CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
   llvm::GlobalVariable *GV =
     new llvm::GlobalVariable(CGM.getModule(), LTy,
                              Ty.isConstant(getContext()), Linkage,
-                             CGM.EmitNullConstant(D.getType()), Name, 0,
+                             CGM.EmitNullConstant(D.getType()), Name, nullptr,
                              llvm::GlobalVariable::NotThreadLocal,
                              AddrSpace);
   GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
@@ -212,6 +201,20 @@ CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
   if (D.getTLSKind())
     CGM.setTLSMode(GV, D);
 
+  if (D.isExternallyVisible()) {
+    if (D.hasAttr<DLLImportAttr>())
+      GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
+    else if (D.hasAttr<DLLExportAttr>())
+      GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
+  }
+
+  // Make sure the result is of the correct type.
+  unsigned ExpectedAddrSpace = CGM.getContext().getTargetAddressSpace(Ty);
+  if (AddrSpace != ExpectedAddrSpace) {
+    llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace);
+    return llvm::ConstantExpr::getAddrSpaceCast(GV, PTy);
+  }
+
   return GV;
 }
 
@@ -290,7 +293,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
 void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
                                       llvm::GlobalValue::LinkageTypes Linkage) {
   llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(DMEntry == 0 && "Decl already exists in localdeclmap!");
+  assert(!DMEntry && "Decl already exists in localdeclmap!");
 
   // Check to see if we already have a global variable for this
   // declaration.  This can happen when double-emitting function
@@ -298,12 +301,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   llvm::Constant *addr =
     CGM.getStaticLocalDeclAddress(&D);
 
-  llvm::GlobalVariable *var;
-  if (addr) {
-    var = cast<llvm::GlobalVariable>(addr->stripPointerCasts());
-  } else {
-    addr = var = CreateStaticVarDecl(D, ".", Linkage);
-  }
+  if (!addr)
+    addr = CreateStaticVarDecl(D, ".", Linkage);
 
   // Store into LocalDeclMap before generating initializer to handle
   // circular references.
@@ -319,6 +318,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   // Save the type in case adding the initializer forces a type change.
   llvm::Type *expectedType = addr->getType();
 
+  llvm::GlobalVariable *var =
+    cast<llvm::GlobalVariable>(addr->stripPointerCasts());
   // If this value has an initializer, emit it.
   if (D.getInit())
     var = AddInitializerToStaticVarDecl(D, var);
@@ -332,17 +333,20 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
     var->setSection(SA->getName());
 
   if (D.hasAttr<UsedAttr>())
-    CGM.AddUsedGlobal(var);
+    CGM.addUsedGlobal(var);
 
   // We may have to cast the constant because of the initializer
   // mismatch above.
   //
   // FIXME: It is really dangerous to store this in the map; if anyone
   // RAUW's the GV uses of this constant will be invalid.
-  llvm::Constant *castedAddr = llvm::ConstantExpr::getBitCast(var, expectedType);
+  llvm::Constant *castedAddr =
+    llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(var, expectedType);
   DMEntry = castedAddr;
   CGM.setStaticLocalDeclAddress(&D, castedAddr);
 
+  CGM.reportGlobalToASan(var, D);
+
   // Emit global variable debug descriptor for static vars.
   CGDebugInfo *DI = getDebugInfo();
   if (DI &&
@@ -365,7 +369,7 @@ namespace {
     CodeGenFunction::Destroyer *destroyer;
     bool useEHCleanupForArray;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Don't use an EH cleanup recursively from an EH cleanup.
       bool useEHCleanupForArray =
         flags.isForNormalCleanup() && this->useEHCleanupForArray;
@@ -384,11 +388,11 @@ namespace {
     llvm::Value *NRVOFlag;
     llvm::Value *Loc;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Along the exceptions path we always execute the dtor.
       bool NRVO = flags.isForNormalCleanup() && NRVOFlag;
 
-      llvm::BasicBlock *SkipDtorBB = 0;
+      llvm::BasicBlock *SkipDtorBB = nullptr;
       if (NRVO) {
         // If we exited via NRVO, we skip the destructor call.
         llvm::BasicBlock *RunDtorBB = CGF.createBasicBlock("nrvo.unused");
@@ -410,7 +414,7 @@ namespace {
   struct CallStackRestore : EHScopeStack::Cleanup {
     llvm::Value *Stack;
     CallStackRestore(llvm::Value *Stack) : Stack(Stack) {}
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       llvm::Value *V = CGF.Builder.CreateLoad(Stack);
       llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
       CGF.Builder.CreateCall(F, V);
@@ -421,7 +425,7 @@ namespace {
     const VarDecl &Var;
     ExtendGCLifetime(const VarDecl *var) : Var(*var) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Compute the address of the local variable, in case it's a
       // byref or something.
       DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
@@ -441,7 +445,7 @@ namespace {
                         const VarDecl *Var)
       : CleanupFn(CleanupFn), FnInfo(*Info), Var(*Var) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
                       Var.getType(), VK_LValue, SourceLocation());
       // Compute the address of the local variable, in case it's a byref
@@ -473,7 +477,7 @@ namespace {
     CallLifetimeEnd(llvm::Value *addr, llvm::Value *size)
       : Addr(addr), Size(size) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       llvm::Value *castAddr = CGF.Builder.CreateBitCast(Addr, CGF.Int8PtrTy);
       CGF.Builder.CreateCall2(CGF.CGM.getLLVMLifetimeEndFn(),
                               Size, castAddr)
@@ -530,9 +534,8 @@ static bool isAccessedBy(const VarDecl &var, const Stmt *s) {
       return (ref->getDecl() == &var);
     if (const BlockExpr *be = dyn_cast<BlockExpr>(e)) {
       const BlockDecl *block = be->getBlockDecl();
-      for (BlockDecl::capture_const_iterator i = block->capture_begin(),
-           e = block->capture_end(); i != e; ++i) {
-        if (i->getVariable() == &var)
+      for (const auto &I : block->captures()) {
+        if (I.getVariable() == &var)
           return true;
       }
     }
@@ -571,7 +574,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init,
     EmitStoreThroughLValue(RValue::get(value), lvalue, true);
     return;
   }
-
+  
+  if (const CXXDefaultInitExpr *DIE = dyn_cast<CXXDefaultInitExpr>(init))
+    init = DIE->getExpr();
+    
   // If we're emitting a value with lifetime, we have to do the
   // initialization *before* we leave the cleanup scopes.
   if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) {
@@ -613,7 +619,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init,
   }
 
   // Emit the initializer.
-  llvm::Value *value = 0;
+  llvm::Value *value = nullptr;
 
   switch (lifetime) {
   case Qualifiers::OCL_None:
@@ -798,9 +804,6 @@ static bool shouldUseMemSetPlusStoresToInitialize(llvm::Constant *Init,
 /// Should we use the LLVM lifetime intrinsics for the given local variable?
 static bool shouldUseLifetimeMarkers(CodeGenFunction &CGF, const VarDecl &D,
                                      unsigned Size) {
-  // Always emit lifetime markers in -fsanitize=use-after-scope mode.
-  if (CGF.getLangOpts().Sanitize.UseAfterScope)
-    return true;
   // For now, only in optimized builds.
   if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0)
     return false;
@@ -823,7 +826,7 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) {
 }
 
 /// EmitAutoVarAlloca - Emit the alloca and debug information for a
-/// local variable.  Does not emit initalization or destruction.
+/// local variable.  Does not emit initialization or destruction.
 CodeGenFunction::AutoVarEmission
 CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   QualType Ty = D.getType();
@@ -866,7 +869,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
           CGM.isTypeConstant(Ty, true)) {
         EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
 
-        emission.Address = 0; // signal this condition to later callbacks
+        emission.Address = nullptr; // signal this condition to later callbacks
         assert(emission.wasEmittedAsGlobal());
         return emission;
       }
@@ -944,12 +947,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
 
       // Push a cleanup block and restore the stack there.
       // FIXME: in general circumstances, this should be an EH cleanup.
-      EHStack.pushCleanup<CallStackRestore>(NormalCleanup, Stack);
+      pushStackRestore(NormalCleanup, Stack);
     }
 
     llvm::Value *elementCount;
     QualType elementType;
-    llvm::tie(elementCount, elementType) = getVLASize(Ty);
+    std::tie(elementCount, elementType) = getVLASize(Ty);
 
     llvm::Type *llvmTy = ConvertTypeForMem(elementType);
 
@@ -961,7 +964,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   }
 
   llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(DMEntry == 0 && "Decl already exists in localdeclmap!");
+  assert(!DMEntry && "Decl already exists in localdeclmap!");
   DMEntry = DeclPtr;
   emission.Address = DeclPtr;
 
@@ -990,9 +993,8 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) {
 
   if (const BlockExpr *be = dyn_cast<BlockExpr>(e)) {
     const BlockDecl *block = be->getBlockDecl();
-    for (BlockDecl::capture_const_iterator i = block->capture_begin(),
-           e = block->capture_end(); i != e; ++i) {
-      if (i->getVariable() == &var)
+    for (const auto &I : block->captures()) {
+      if (I.getVariable() == &var)
         return true;
     }
 
@@ -1002,18 +1004,16 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) {
 
   if (const StmtExpr *SE = dyn_cast<StmtExpr>(e)) {
     const CompoundStmt *CS = SE->getSubStmt();
-    for (CompoundStmt::const_body_iterator BI = CS->body_begin(),
-	   BE = CS->body_end(); BI != BE; ++BI)
-      if (Expr *E = dyn_cast<Expr>((*BI))) {
+    for (const auto *BI : CS->body())
+      if (const auto *E = dyn_cast<Expr>(BI)) {
         if (isCapturedBy(var, E))
             return true;
       }
-      else if (DeclStmt *DS = dyn_cast<DeclStmt>((*BI))) {
+      else if (const auto *DS = dyn_cast<DeclStmt>(BI)) {
           // special case declarations
-          for (DeclStmt::decl_iterator I = DS->decl_begin(), E = DS->decl_end();
-               I != E; ++I) {
-              if (VarDecl *VD = dyn_cast<VarDecl>((*I))) {
-                Expr *Init = VD->getInit();
+          for (const auto *I : DS->decls()) {
+              if (const auto *VD = dyn_cast<VarDecl>((I))) {
+                const Expr *Init = VD->getInit();
                 if (Init && isCapturedBy(var, Init))
                   return true;
               }
@@ -1084,7 +1084,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   llvm::Value *Loc =
     capturedByInit ? emission.Address : emission.getObjectAddress(*this);
 
-  llvm::Constant *constant = 0;
+  llvm::Constant *constant = nullptr;
   if (emission.IsConstantAggregate || D.isConstexpr()) {
     assert(!capturedByInit && "constant init contains a capturing block?");
     constant = CGM.EmitConstantInit(D, this);
@@ -1211,7 +1211,7 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
   QualType type = var->getType();
 
   CleanupKind cleanupKind = NormalAndEHCleanup;
-  CodeGenFunction::Destroyer *destroyer = 0;
+  CodeGenFunction::Destroyer *destroyer = nullptr;
 
   switch (dtorKind) {
   case QualType::DK_none:
@@ -1344,6 +1344,10 @@ void CodeGenFunction::pushDestroy(CleanupKind cleanupKind, llvm::Value *addr,
                                      destroyer, useEHCleanupForArray);
 }
 
+void CodeGenFunction::pushStackRestore(CleanupKind Kind, llvm::Value *SPMem) {
+  EHStack.pushCleanup<CallStackRestore>(Kind, SPMem);
+}
+
 void CodeGenFunction::pushLifetimeExtendedDestroy(
     CleanupKind cleanupKind, llvm::Value *addr, QualType type,
     Destroyer *destroyer, bool useEHCleanupForArray) {
@@ -1505,7 +1509,7 @@ namespace {
       : ArrayBegin(arrayBegin), ArrayEnd(arrayEnd),
         ElementType(elementType), Destroyer(destroyer) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       emitPartialArrayDestroy(CGF, ArrayBegin, ArrayEnd,
                               ElementType, Destroyer);
     }
@@ -1527,7 +1531,7 @@ namespace {
       : ArrayBegin(arrayBegin), ArrayEndPointer(arrayEndPointer),
         ElementType(elementType), Destroyer(destroyer) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       llvm::Value *arrayEnd = CGF.Builder.CreateLoad(ArrayEndPointer);
       emitPartialArrayDestroy(CGF, ArrayBegin, arrayEnd,
                               ElementType, Destroyer);
@@ -1594,7 +1598,7 @@ namespace {
     llvm::Value *Param;
     ARCPreciseLifetime_t Precise;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitARCRelease(Param, Precise);
     }
   };
@@ -1603,7 +1607,7 @@ namespace {
 /// Emit an alloca (or GlobalValue depending on target)
 /// for the specified parameter and set up LocalDeclMap.
 void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
-                                   unsigned ArgNo) {
+                                   bool ArgIsPointer, unsigned ArgNo) {
   // FIXME: Why isn't ImplicitParamDecl a ParmVarDecl?
   assert((isa<ParmVarDecl>(D) || isa<ImplicitParamDecl>(D)) &&
          "Invalid argument to EmitParmDecl");
@@ -1617,7 +1621,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
     // The only implicit argument a block has is its literal.
     if (BlockInfo) {
       LocalDeclMap[&D] = Arg;
-      llvm::Value *LocalAddr = 0;
+      llvm::Value *LocalAddr = nullptr;
       if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
         // Allocate a stack slot to let the debug info survive the RA.
         llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
@@ -1641,30 +1645,35 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
   }
 
   llvm::Value *DeclPtr;
-  bool HasNonScalarEvalKind = !CodeGenFunction::hasScalarEvaluationKind(Ty);
-  // If this is an aggregate or variable sized value, reuse the input pointer.
-  if (HasNonScalarEvalKind || !Ty->isConstantSizeType()) {
-    DeclPtr = Arg;
+  bool DoStore = false;
+  bool IsScalar = hasScalarEvaluationKind(Ty);
+  CharUnits Align = getContext().getDeclAlign(&D);
+  // If we already have a pointer to the argument, reuse the input pointer.
+  if (ArgIsPointer) {
+    // If we have a prettier pointer type at this point, bitcast to that.
+    unsigned AS = cast<llvm::PointerType>(Arg->getType())->getAddressSpace();
+    llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
+    DeclPtr = Arg->getType() == IRTy ? Arg : Builder.CreateBitCast(Arg, IRTy,
+                                                                   D.getName());
     // Push a destructor cleanup for this parameter if the ABI requires it.
-    if (HasNonScalarEvalKind &&
-        getTarget().getCXXABI().isArgumentDestroyedByCallee()) {
-      if (const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl()) {
-        if (RD->hasNonTrivialDestructor())
-          pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty);
-      }
+    if (!IsScalar &&
+        getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+      const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
+      if (RD && RD->hasNonTrivialDestructor())
+        pushDestroy(QualType::DK_cxx_destructor, DeclPtr, Ty);
     }
   } else {
     // Otherwise, create a temporary to hold the value.
     llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
                                                D.getName() + ".addr");
-    CharUnits Align = getContext().getDeclAlign(&D);
     Alloc->setAlignment(Align.getQuantity());
     DeclPtr = Alloc;
+    DoStore = true;
+  }
 
-    bool doStore = true;
-
+  LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
+  if (IsScalar) {
     Qualifiers qs = Ty.getQualifiers();
-    LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
     if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
       // We honor __attribute__((ns_consumed)) for types with lifetime.
       // For __strong, it's handled by just skipping the initial retain;
@@ -1693,7 +1702,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
             llvm::Value *Null = CGM.EmitNullConstant(D.getType());
             EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
             EmitARCStoreStrongCall(lv.getAddress(), Arg, true);
-            doStore = false;
+            DoStore = false;
           }
           else
           // Don't use objc_retainBlock for block pointers, because we
@@ -1712,21 +1721,21 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
 
         if (lt == Qualifiers::OCL_Weak) {
           EmitARCInitWeak(DeclPtr, Arg);
-          doStore = false; // The weak init is a store, no need to do two.
+          DoStore = false; // The weak init is a store, no need to do two.
         }
       }
 
       // Enter the cleanup scope.
       EmitAutoVarWithLifetime(*this, D, DeclPtr, lt);
     }
-
-    // Store the initial value into the alloca.
-    if (doStore)
-      EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
   }
 
+  // Store the initial value into the alloca.
+  if (DoStore)
+    EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
+
   llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(DMEntry == 0 && "Decl already exists in localdeclmap!");
+  assert(!DMEntry && "Decl already exists in localdeclmap!");
   DMEntry = DeclPtr;
 
   // Emit debug info for param declaration.
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 7bdb9eb0a4a6..94cfe211601f 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -17,6 +17,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Path.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -89,7 +90,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
 
   // Special-case non-array C++ destructors, where there's a function
   // with the right signature that we can just call.
-  const CXXRecordDecl *record = 0;
+  const CXXRecordDecl *record = nullptr;
   if (dtorKind == QualType::DK_cxx_destructor &&
       (record = type->getAsCXXRecordDecl())) {
     assert(!record->hasTrivialDestructor());
@@ -178,8 +179,7 @@ static llvm::Constant *createAtExitStub(CodeGenModule &CGM, const VarDecl &VD,
   CodeGenFunction CGF(CGM);
 
   CGF.StartFunction(&VD, CGM.getContext().VoidTy, fn,
-                    CGM.getTypes().arrangeNullaryFunction(), FunctionArgList(),
-                    SourceLocation());
+                    CGM.getTypes().arrangeNullaryFunction(), FunctionArgList());
 
   llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr);
  
@@ -245,16 +245,44 @@ CreateGlobalInitOrDestructFunction(CodeGenModule &CGM,
   if (!CGM.getLangOpts().Exceptions)
     Fn->setDoesNotThrow();
 
-  if (CGM.getSanOpts().Address)
-    Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
-  if (CGM.getSanOpts().Thread)
-    Fn->addFnAttr(llvm::Attribute::SanitizeThread);
-  if (CGM.getSanOpts().Memory)
-    Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+  if (!CGM.getSanitizerBlacklist().isIn(*Fn)) {
+    if (CGM.getLangOpts().Sanitize.Address)
+      Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
+    if (CGM.getLangOpts().Sanitize.Thread)
+      Fn->addFnAttr(llvm::Attribute::SanitizeThread);
+    if (CGM.getLangOpts().Sanitize.Memory)
+      Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+  }
 
   return Fn;
 }
 
+/// Create a global pointer to a function that will initialize a global
+/// variable.  The user has requested that this pointer be emitted in a specific
+/// section.
+void CodeGenModule::EmitPointerToInitFunc(const VarDecl *D,
+                                          llvm::GlobalVariable *GV,
+                                          llvm::Function *InitFunc,
+                                          InitSegAttr *ISA) {
+  llvm::GlobalVariable *PtrArray = new llvm::GlobalVariable(
+      TheModule, InitFunc->getType(), /*isConstant=*/true,
+      llvm::GlobalValue::PrivateLinkage, InitFunc, "__cxx_init_fn_ptr");
+  PtrArray->setSection(ISA->getSection());
+  addUsedGlobal(PtrArray);
+
+  // If the GV is already in a comdat group, then we have to join it.
+  llvm::Comdat *C = GV->getComdat();
+
+  // LinkOnce and Weak linkage are lowered down to a single-member comdat group.
+  // Make an explicit group so we can join it.
+  if (!C && (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())) {
+    C = TheModule.getOrInsertComdat(GV->getName());
+    GV->setComdat(C);
+  }
+  if (C)
+    PtrArray->setComdat(C);
+}
+
 void
 CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
                                             llvm::GlobalVariable *Addr,
@@ -270,9 +298,9 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
   llvm::Function *Fn =
       CreateGlobalInitOrDestructFunction(*this, FTy, FnName.str());
 
+  auto *ISA = D->getAttr<InitSegAttr>();
   CodeGenFunction(*this).GenerateCXXGlobalVarDeclInitFunc(Fn, D, Addr,
                                                           PerformInit);
-
   if (D->getTLSKind()) {
     // FIXME: Should we support init_priority for thread_local?
     // FIXME: Ideally, initialization of instantiated thread_local static data
@@ -281,9 +309,11 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
     // FIXME: We only need to register one __cxa_thread_atexit function for the
     // entire TU.
     CXXThreadLocalInits.push_back(Fn);
-  } else if (D->hasAttr<InitPriorityAttr>()) {
-    unsigned int order = D->getAttr<InitPriorityAttr>()->getPriority();
-    OrderGlobalInits Key(order, PrioritizedCXXGlobalInits.size());
+  } else if (PerformInit && ISA) {
+    EmitPointerToInitFunc(D, Addr, Fn, ISA);
+    DelayedCXXInitPosition.erase(D);
+  } else if (auto *IPA = D->getAttr<InitPriorityAttr>()) {
+    OrderGlobalInits Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size());
     PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn));
     DelayedCXXInitPosition.erase(D);
   } else if (D->getTemplateSpecializationKind() != TSK_ExplicitSpecialization &&
@@ -295,10 +325,12 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
     //   have unordered initialization.
     //
     // As a consequence, we can put them into their own llvm.global_ctors entry.
-    // This should allow GlobalOpt to fire more often, and allow us to implement
-    // the Microsoft C++ ABI, which uses COMDAT elimination to avoid double
-    // initializaiton.
-    AddGlobalCtor(Fn);
+    //
+    // In addition, put the initializer into a COMDAT group with the global
+    // being initialized.  On most platforms, this is a minor startup time
+    // optimization.  In the MS C++ ABI, there are no guard variables, so this
+    // COMDAT key is required for correctness.
+    AddGlobalCtor(Fn, 65535, Addr);
     DelayedCXXInitPosition.erase(D);
   } else {
     llvm::DenseMap<const Decl *, unsigned>::iterator I =
@@ -306,7 +338,7 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
     if (I == DelayedCXXInitPosition.end()) {
       CXXGlobalInits.push_back(Fn);
     } else {
-      assert(CXXGlobalInits[I->second] == 0);
+      assert(CXXGlobalInits[I->second] == nullptr);
       CXXGlobalInits[I->second] = Fn;
       DelayedCXXInitPosition.erase(I);
     }
@@ -314,7 +346,7 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
 }
 
 void CodeGenModule::EmitCXXThreadLocalInitFunc() {
-  llvm::Function *InitFn = 0;
+  llvm::Function *InitFn = nullptr;
   if (!CXXThreadLocalInits.empty()) {
     // Generate a guarded initialization function.
     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
@@ -364,7 +396,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
       // Compute the function suffix from priority. Prepend with zeroes to make
       // sure the function names are also ordered as priorities.
       std::string PrioritySuffix = llvm::utostr(Priority);
-      // Priority is always <= 65535 (enforced by sema)..
+      // Priority is always <= 65535 (enforced by sema).
       PrioritySuffix = std::string(6-PrioritySuffix.size(), '0')+PrioritySuffix;
       llvm::Function *Fn = 
         CreateGlobalInitOrDestructFunction(*this, FTy,
@@ -378,8 +410,20 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
     }
   }
   
-  llvm::Function *Fn = 
-    CreateGlobalInitOrDestructFunction(*this, FTy, "_GLOBAL__I_a");
+  // Include the filename in the symbol name. Including "sub_" matches gcc and
+  // makes sure these symbols appear lexicographically behind the symbols with
+  // priority emitted above.
+  SourceManager &SM = Context.getSourceManager();
+  SmallString<128> FileName(llvm::sys::path::filename(
+      SM.getFileEntryForID(SM.getMainFileID())->getName()));
+  for (size_t i = 0; i < FileName.size(); ++i) {
+    // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+    // to be the set of C preprocessing numbers.
+    if (!isPreprocessingNumberBody(FileName[i]))
+      FileName[i] = '_';
+  }
+  llvm::Function *Fn = CreateGlobalInitOrDestructFunction(
+      *this, FTy, llvm::Twine("_GLOBAL__sub_I_", FileName));
 
   CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits);
   AddGlobalCtor(Fn);
@@ -409,17 +453,17 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
                                                        bool PerformInit) {
   // Check if we need to emit debug info for variable initializer.
   if (D->hasAttr<NoDebugAttr>())
-    DebugInfo = NULL; // disable debug info indefinitely for this function
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
 
   StartFunction(GlobalDecl(D), getContext().VoidTy, Fn,
                 getTypes().arrangeNullaryFunction(),
-                FunctionArgList(), D->getInit()->getExprLoc());
+                FunctionArgList(), D->getLocation(),
+                D->getInit()->getExprLoc());
 
   // Use guarded initialization if the global variable is weak. This
   // occurs for, e.g., instantiated static data members and
   // definitions explicitly marked weak.
-  if (Addr->getLinkage() == llvm::GlobalValue::WeakODRLinkage ||
-      Addr->getLinkage() == llvm::GlobalValue::WeakAnyLinkage) {
+  if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage()) {
     EmitCXXGuardedInit(*D, Addr, PerformInit);
   } else {
     EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
@@ -432,44 +476,49 @@ void
 CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
                                            ArrayRef<llvm::Constant *> Decls,
                                            llvm::GlobalVariable *Guard) {
-  StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
-                getTypes().arrangeNullaryFunction(),
-                FunctionArgList(), SourceLocation());
-
-  llvm::BasicBlock *ExitBlock = 0;
-  if (Guard) {
-    // If we have a guard variable, check whether we've already performed these
-    // initializations. This happens for TLS initialization functions.
-    llvm::Value *GuardVal = Builder.CreateLoad(Guard);
-    llvm::Value *Uninit = Builder.CreateIsNull(GuardVal, "guard.uninitialized");
-    // Mark as initialized before initializing anything else. If the
-    // initializers use previously-initialized thread_local vars, that's
-    // probably supposed to be OK, but the standard doesn't say.
-    Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(), 1), Guard);
-    llvm::BasicBlock *InitBlock = createBasicBlock("init");
-    ExitBlock = createBasicBlock("exit");
-    Builder.CreateCondBr(Uninit, InitBlock, ExitBlock);
-    EmitBlock(InitBlock);
-  }
+  {
+    ArtificialLocation AL(*this, Builder);
+    StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
+                  getTypes().arrangeNullaryFunction(), FunctionArgList());
+    // Emit an artificial location for this function.
+    AL.Emit();
+
+    llvm::BasicBlock *ExitBlock = nullptr;
+    if (Guard) {
+      // If we have a guard variable, check whether we've already performed
+      // these initializations. This happens for TLS initialization functions.
+      llvm::Value *GuardVal = Builder.CreateLoad(Guard);
+      llvm::Value *Uninit = Builder.CreateIsNull(GuardVal,
+                                                 "guard.uninitialized");
+      // Mark as initialized before initializing anything else. If the
+      // initializers use previously-initialized thread_local vars, that's
+      // probably supposed to be OK, but the standard doesn't say.
+      Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
+      llvm::BasicBlock *InitBlock = createBasicBlock("init");
+      ExitBlock = createBasicBlock("exit");
+      Builder.CreateCondBr(Uninit, InitBlock, ExitBlock);
+      EmitBlock(InitBlock);
+    }
 
-  RunCleanupsScope Scope(*this);
+    RunCleanupsScope Scope(*this);
 
-  // When building in Objective-C++ ARC mode, create an autorelease pool
-  // around the global initializers.
-  if (getLangOpts().ObjCAutoRefCount && getLangOpts().CPlusPlus) {    
-    llvm::Value *token = EmitObjCAutoreleasePoolPush();
-    EmitObjCAutoreleasePoolCleanup(token);
-  }
+    // When building in Objective-C++ ARC mode, create an autorelease pool
+    // around the global initializers.
+    if (getLangOpts().ObjCAutoRefCount && getLangOpts().CPlusPlus) {
+      llvm::Value *token = EmitObjCAutoreleasePoolPush();
+      EmitObjCAutoreleasePoolCleanup(token);
+    }
 
-  for (unsigned i = 0, e = Decls.size(); i != e; ++i)
-    if (Decls[i])
-      EmitRuntimeCall(Decls[i]);
+    for (unsigned i = 0, e = Decls.size(); i != e; ++i)
+      if (Decls[i])
+        EmitRuntimeCall(Decls[i]);
 
-  Scope.ForceCleanup();
+    Scope.ForceCleanup();
 
-  if (ExitBlock) {
-    Builder.CreateBr(ExitBlock);
-    EmitBlock(ExitBlock);
+    if (ExitBlock) {
+      Builder.CreateBr(ExitBlock);
+      EmitBlock(ExitBlock);
+    }
   }
 
   FinishFunction();
@@ -478,18 +527,22 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
 void CodeGenFunction::GenerateCXXGlobalDtorsFunc(llvm::Function *Fn,
                   const std::vector<std::pair<llvm::WeakVH, llvm::Constant*> >
                                                 &DtorsAndObjects) {
-  StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
-                getTypes().arrangeNullaryFunction(),
-                FunctionArgList(), SourceLocation());
-
-  // Emit the dtors, in reverse order from construction.
-  for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) {
-    llvm::Value *Callee = DtorsAndObjects[e - i - 1].first;
-    llvm::CallInst *CI = Builder.CreateCall(Callee,
-                                            DtorsAndObjects[e - i - 1].second);
-    // Make sure the call and the callee agree on calling convention.
-    if (llvm::Function *F = dyn_cast<llvm::Function>(Callee))
-      CI->setCallingConv(F->getCallingConv());
+  {
+    ArtificialLocation AL(*this, Builder);
+    StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
+                  getTypes().arrangeNullaryFunction(), FunctionArgList());
+    // Emit an artificial location for this function.
+    AL.Emit();
+
+    // Emit the dtors, in reverse order from construction.
+    for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) {
+      llvm::Value *Callee = DtorsAndObjects[e - i - 1].first;
+      llvm::CallInst *CI = Builder.CreateCall(Callee,
+                                          DtorsAndObjects[e - i - 1].second);
+      // Make sure the call and the callee agree on calling convention.
+      if (llvm::Function *F = dyn_cast<llvm::Function>(Callee))
+        CI->setCallingConv(F->getCallingConv());
+    }
   }
 
   FinishFunction();
@@ -501,18 +554,17 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
     llvm::Constant *addr, QualType type, Destroyer *destroyer,
     bool useEHCleanupForArray, const VarDecl *VD) {
   FunctionArgList args;
-  ImplicitParamDecl dst(0, SourceLocation(), 0, getContext().VoidPtrTy);
+  ImplicitParamDecl dst(getContext(), nullptr, SourceLocation(), nullptr,
+                        getContext().VoidPtrTy);
   args.push_back(&dst);
-  
-  const CGFunctionInfo &FI = 
-    CGM.getTypes().arrangeFunctionDeclaration(getContext().VoidTy, args,
-                                              FunctionType::ExtInfo(),
-                                              /*variadic*/ false);
+
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
   llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
   llvm::Function *fn = 
     CreateGlobalInitOrDestructFunction(CGM, FTy, "__cxx_global_array_dtor");
 
-  StartFunction(VD, getContext().VoidTy, fn, FI, args, SourceLocation());
+  StartFunction(VD, getContext().VoidTy, fn, FI, args);
 
   emitDestroy(addr, type, destroyer, useEHCleanupForArray);
   
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 39a992aab17d..1bbda5cbf09c 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -17,8 +17,8 @@
 #include "TargetInfo.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/CallSite.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -97,24 +97,6 @@ static llvm::Constant *getUnexpectedFn(CodeGenModule &CGM) {
   return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected");
 }
 
-llvm::Constant *CodeGenFunction::getUnwindResumeFn() {
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(VoidTy, Int8PtrTy, /*IsVarArgs=*/false);
-
-  if (CGM.getLangOpts().SjLjExceptions)
-    return CGM.CreateRuntimeFunction(FTy, "_Unwind_SjLj_Resume");
-  return CGM.CreateRuntimeFunction(FTy, "_Unwind_Resume");
-}
-
-llvm::Constant *CodeGenFunction::getUnwindResumeOrRethrowFn() {
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(VoidTy, Int8PtrTy, /*IsVarArgs=*/false);
-
-  if (CGM.getLangOpts().SjLjExceptions)
-    return CGM.CreateRuntimeFunction(FTy, "_Unwind_SjLj_Resume_or_Rethrow");
-  return CGM.CreateRuntimeFunction(FTy, "_Unwind_Resume_or_Rethrow");
-}
-
 static llvm::Constant *getTerminateFn(CodeGenModule &CGM) {
   // void __terminate();
 
@@ -164,18 +146,21 @@ namespace {
   };
 }
 
-const EHPersonality EHPersonality::GNU_C = { "__gcc_personality_v0", 0 };
-const EHPersonality EHPersonality::GNU_C_SJLJ = { "__gcc_personality_sj0", 0 };
-const EHPersonality EHPersonality::NeXT_ObjC = { "__objc_personality_v0", 0 };
-const EHPersonality EHPersonality::GNU_CPlusPlus = { "__gxx_personality_v0", 0};
+const EHPersonality EHPersonality::GNU_C = { "__gcc_personality_v0", nullptr };
+const EHPersonality
+EHPersonality::GNU_C_SJLJ = { "__gcc_personality_sj0", nullptr };
+const EHPersonality
+EHPersonality::NeXT_ObjC = { "__objc_personality_v0", nullptr };
+const EHPersonality
+EHPersonality::GNU_CPlusPlus = { "__gxx_personality_v0", nullptr };
 const EHPersonality
-EHPersonality::GNU_CPlusPlus_SJLJ = { "__gxx_personality_sj0", 0 };
+EHPersonality::GNU_CPlusPlus_SJLJ = { "__gxx_personality_sj0", nullptr };
 const EHPersonality
 EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0", "objc_exception_throw"};
 const EHPersonality
-EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", 0 };
+EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", nullptr };
 const EHPersonality
-EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", 0 };
+EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", nullptr };
 
 static const EHPersonality &getCPersonality(const LangOptions &L) {
   if (L.SjLjExceptions)
@@ -263,12 +248,9 @@ static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
 /// Check whether a personality function could reasonably be swapped
 /// for a C++ personality function.
 static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
-  for (llvm::Constant::use_iterator
-         I = Fn->use_begin(), E = Fn->use_end(); I != E; ++I) {
-    llvm::User *User = *I;
-
+  for (llvm::User *U : Fn->users()) {
     // Conditionally white-list bitcasts.
-    if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(User)) {
+    if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(U)) {
       if (CE->getOpcode() != llvm::Instruction::BitCast) return false;
       if (!PersonalityHasOnlyCXXUses(CE))
         return false;
@@ -276,7 +258,7 @@ static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
     }
 
     // Otherwise, it has to be a landingpad instruction.
-    llvm::LandingPadInst *LPI = dyn_cast<llvm::LandingPadInst>(User);
+    llvm::LandingPadInst *LPI = dyn_cast<llvm::LandingPadInst>(U);
     if (!LPI) return false;
 
     for (unsigned I = 0, E = LPI->getNumClauses(); I != E; ++I) {
@@ -363,7 +345,7 @@ namespace {
   struct FreeException : EHScopeStack::Cleanup {
     llvm::Value *exn;
     FreeException(llvm::Value *exn) : exn(exn) {}
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitNounwindRuntimeCall(getFreeExceptionFn(CGF.CGM), exn);
     }
   };
@@ -421,6 +403,11 @@ llvm::Value *CodeGenFunction::getSelectorFromSlot() {
 
 void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E,
                                        bool KeepInsertionPoint) {
+  if (CGM.getTarget().getTriple().isWindowsMSVCEnvironment()) {
+    ErrorUnsupported(E, "throw expression");
+    return;
+  }
+
   if (!E->getSubExpr()) {
     EmitNoreturnRuntimeCallOrInvoke(getReThrowFn(CGM),
                                     ArrayRef<llvm::Value*>());
@@ -465,7 +452,7 @@ void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E,
 
   // The address of the destructor.  If the exception type has a
   // trivial destructor (or isn't a record), we just pass null.
-  llvm::Constant *Dtor = 0;
+  llvm::Constant *Dtor = nullptr;
   if (const RecordType *RecordTy = ThrowType->getAs<RecordType>()) {
     CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl());
     if (!Record->hasTrivialDestructor()) {
@@ -490,10 +477,16 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) {
     return;
   
   const FunctionDecl* FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (FD == 0)
+  if (!FD) {
+    // Check if CapturedDecl is nothrow and create terminate scope for it.
+    if (const CapturedDecl* CD = dyn_cast_or_null<CapturedDecl>(D)) {
+      if (CD->isNothrow())
+        EHStack.pushTerminate();
+    }
     return;
+  }
   const FunctionProtoType *Proto = FD->getType()->getAs<FunctionProtoType>();
-  if (Proto == 0)
+  if (!Proto)
     return;
 
   ExceptionSpecificationType EST = Proto->getExceptionSpecType();
@@ -558,10 +551,16 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
     return;
   
   const FunctionDecl* FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (FD == 0)
+  if (!FD) {
+    // Check if CapturedDecl is nothrow and pop terminate scope for it.
+    if (const CapturedDecl* CD = dyn_cast_or_null<CapturedDecl>(D)) {
+      if (CD->isNothrow())
+        EHStack.popTerminate();
+    }
     return;
+  }
   const FunctionProtoType *Proto = FD->getType()->getAs<FunctionProtoType>();
-  if (Proto == 0)
+  if (!Proto)
     return;
 
   ExceptionSpecificationType EST = Proto->getExceptionSpecType();
@@ -577,6 +576,11 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
 }
 
 void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) {
+  if (CGM.getTarget().getTriple().isWindowsMSVCEnvironment()) {
+    ErrorUnsupported(&S, "try statement");
+    return;
+  }
+
   EnterCXXTryStmt(S);
   EmitStmt(S.getTryBlock());
   ExitCXXTryStmt(S);
@@ -600,7 +604,7 @@ void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
       QualType CaughtType = C->getCaughtType();
       CaughtType = CaughtType.getNonReferenceType().getUnqualifiedType();
 
-      llvm::Value *TypeInfo = 0;
+      llvm::Constant *TypeInfo = nullptr;
       if (CaughtType->isObjCObjectPointerType())
         TypeInfo = CGM.getObjCRuntime().GetEHType(CaughtType);
       else
@@ -678,7 +682,7 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() {
   assert(!EHStack.empty());
 
   if (!CGM.getLangOpts().Exceptions)
-    return 0;
+    return nullptr;
 
   // Check the innermost scope for a cached landing pad.  If this is
   // a non-EH cleanup, we'll check enclosing scopes in EmitLandingPad.
@@ -699,56 +703,6 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() {
   return LP;
 }
 
-// This code contains a hack to work around a design flaw in
-// LLVM's EH IR which breaks semantics after inlining.  This same
-// hack is implemented in llvm-gcc.
-//
-// The LLVM EH abstraction is basically a thin veneer over the
-// traditional GCC zero-cost design: for each range of instructions
-// in the function, there is (at most) one "landing pad" with an
-// associated chain of EH actions.  A language-specific personality
-// function interprets this chain of actions and (1) decides whether
-// or not to resume execution at the landing pad and (2) if so,
-// provides an integer indicating why it's stopping.  In LLVM IR,
-// the association of a landing pad with a range of instructions is
-// achieved via an invoke instruction, the chain of actions becomes
-// the arguments to the @llvm.eh.selector call, and the selector
-// call returns the integer indicator.  Other than the required
-// presence of two intrinsic function calls in the landing pad,
-// the IR exactly describes the layout of the output code.
-//
-// A principal advantage of this design is that it is completely
-// language-agnostic; in theory, the LLVM optimizers can treat
-// landing pads neutrally, and targets need only know how to lower
-// the intrinsics to have a functioning exceptions system (assuming
-// that platform exceptions follow something approximately like the
-// GCC design).  Unfortunately, landing pads cannot be combined in a
-// language-agnostic way: given selectors A and B, there is no way
-// to make a single landing pad which faithfully represents the
-// semantics of propagating an exception first through A, then
-// through B, without knowing how the personality will interpret the
-// (lowered form of the) selectors.  This means that inlining has no
-// choice but to crudely chain invokes (i.e., to ignore invokes in
-// the inlined function, but to turn all unwindable calls into
-// invokes), which is only semantically valid if every unwind stops
-// at every landing pad.
-//
-// Therefore, the invoke-inline hack is to guarantee that every
-// landing pad has a catch-all.
-enum CleanupHackLevel_t {
-  /// A level of hack that requires that all landing pads have
-  /// catch-alls.
-  CHL_MandatoryCatchall,
-
-  /// A level of hack that requires that all landing pads handle
-  /// cleanups.
-  CHL_MandatoryCleanup,
-
-  /// No hacks at all;  ideal IR generation.
-  CHL_Ideal
-};
-const CleanupHackLevel_t CleanupHackLevel = CHL_MandatoryCleanup;
-
 llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
   assert(EHStack.requiresLandingPad());
 
@@ -766,11 +720,9 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
 
   // Save the current IR generation state.
   CGBuilderTy::InsertPoint savedIP = Builder.saveAndClearIP();
-  SourceLocation SavedLocation;
-  if (CGDebugInfo *DI = getDebugInfo()) {
-    SavedLocation = DI->getLocation();
+  SaveAndRestoreLocation AutoRestoreLocation(*this, Builder);
+  if (CGDebugInfo *DI = getDebugInfo())
     DI->EmitLocation(Builder, CurEHLocation);
-  }
 
   const EHPersonality &personality = EHPersonality::get(getLangOpts());
 
@@ -877,11 +829,8 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
       LPadInst->setCleanup(true);
 
   // Otherwise, signal that we at least have cleanups.
-  } else if (CleanupHackLevel == CHL_MandatoryCatchall || hasCleanup) {
-    if (CleanupHackLevel == CHL_MandatoryCatchall)
-      LPadInst->addClause(getCatchAllValue(*this));
-    else
-      LPadInst->setCleanup(true);
+  } else if (hasCleanup) {
+    LPadInst->setCleanup(true);
   }
 
   assert((LPadInst->getNumClauses() > 0 || LPadInst->isCleanup()) &&
@@ -892,8 +841,6 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
 
   // Restore the old IR generation state.
   Builder.restoreIP(savedIP);
-  if (CGDebugInfo *DI = getDebugInfo())
-    DI->EmitLocation(Builder, SavedLocation);
 
   return lpad;
 }
@@ -915,7 +862,7 @@ namespace {
     CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
     bool MightThrow;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       if (!MightThrow) {
         CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
         return;
@@ -1244,6 +1191,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
 
   // If the catch was not required, bail out now.
   if (!CatchScope.hasEHBranches()) {
+    CatchScope.clearHandlerBlocks();
     EHStack.popCatch();
     return;
   }
@@ -1294,6 +1242,10 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
     // Initialize the catch variable and set up the cleanups.
     BeginCatch(*this, C);
 
+    // Emit the PGO counter increment.
+    RegionCounter CatchCnt = getPGORegionCounter(C);
+    CatchCnt.beginRegion(Builder);
+
     // Perform the body of the catch.
     EmitStmt(C->getHandlerBlock());
 
@@ -1320,7 +1272,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
       Builder.CreateBr(ContBB);
   }
 
+  RegionCounter ContCnt = getPGORegionCounter(&S);
   EmitBlock(ContBB);
+  ContCnt.beginRegion(Builder);
 }
 
 namespace {
@@ -1330,7 +1284,7 @@ namespace {
     CallEndCatchForFinally(llvm::Value *ForEHVar, llvm::Value *EndCatchFn)
       : ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       llvm::BasicBlock *EndCatchBB = CGF.createBasicBlock("finally.endcatch");
       llvm::BasicBlock *CleanupContBB =
         CGF.createBasicBlock("finally.cleanup.cont");
@@ -1357,7 +1311,7 @@ namespace {
       : Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn),
         RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Enter a cleanup to call the end-catch function if one was provided.
       if (EndCatchFn)
         CGF.EHStack.pushCleanup<CallEndCatchForFinally>(NormalAndEHCleanup,
@@ -1422,7 +1376,7 @@ void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF,
                                          llvm::Constant *beginCatchFn,
                                          llvm::Constant *endCatchFn,
                                          llvm::Constant *rethrowFn) {
-  assert((beginCatchFn != 0) == (endCatchFn != 0) &&
+  assert((beginCatchFn != nullptr) == (endCatchFn != nullptr) &&
          "begin/end catch functions not paired");
   assert(rethrowFn && "rethrow function is required");
 
@@ -1437,7 +1391,7 @@ void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF,
   llvm::FunctionType *rethrowFnTy =
     cast<llvm::FunctionType>(
       cast<llvm::PointerType>(rethrowFn->getType())->getElementType());
-  SavedExnVar = 0;
+  SavedExnVar = nullptr;
   if (rethrowFnTy->getNumParams())
     SavedExnVar = CGF.CreateTempAlloca(CGF.Int8PtrTy, "finally.exn");
 
@@ -1487,7 +1441,7 @@ void CodeGenFunction::FinallyInfo::exit(CodeGenFunction &CGF) {
     CGBuilderTy::InsertPoint savedIP = CGF.Builder.saveAndClearIP();
     CGF.EmitBlock(catchBB);
 
-    llvm::Value *exn = 0;
+    llvm::Value *exn = nullptr;
 
     // If there's a begin-catch function, call it.
     if (BeginCatchFn) {
@@ -1651,54 +1605,34 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
   // This can always be a call because we necessarily didn't find
   // anything on the EH stack which needs our help.
   const char *RethrowName = Personality.CatchallRethrowFn;
-  if (RethrowName != 0 && !isCleanup) {
+  if (RethrowName != nullptr && !isCleanup) {
     EmitRuntimeCall(getCatchallRethrowFn(CGM, RethrowName),
-                      getExceptionFromSlot())
+                    getExceptionFromSlot())
       ->setDoesNotReturn();
-  } else {
-    switch (CleanupHackLevel) {
-    case CHL_MandatoryCatchall:
-      // In mandatory-catchall mode, we need to use
-      // _Unwind_Resume_or_Rethrow, or whatever the personality's
-      // equivalent is.
-      EmitRuntimeCall(getUnwindResumeOrRethrowFn(),
-                        getExceptionFromSlot())
-        ->setDoesNotReturn();
-      break;
-    case CHL_MandatoryCleanup: {
-      // In mandatory-cleanup mode, we should use 'resume'.
-
-      // Recreate the landingpad's return value for the 'resume' instruction.
-      llvm::Value *Exn = getExceptionFromSlot();
-      llvm::Value *Sel = getSelectorFromSlot();
-
-      llvm::Type *LPadType = llvm::StructType::get(Exn->getType(),
-                                                   Sel->getType(), NULL);
-      llvm::Value *LPadVal = llvm::UndefValue::get(LPadType);
-      LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val");
-      LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val");
-
-      Builder.CreateResume(LPadVal);
-      Builder.restoreIP(SavedIP);
-      return EHResumeBlock;
-    }
-    case CHL_Ideal:
-      // In an idealized mode where we don't have to worry about the
-      // optimizer combining landing pads, we should just use
-      // _Unwind_Resume (or the personality's equivalent).
-      EmitRuntimeCall(getUnwindResumeFn(), getExceptionFromSlot())
-        ->setDoesNotReturn();
-      break;
-    }
+    Builder.CreateUnreachable();
+    Builder.restoreIP(SavedIP);
+    return EHResumeBlock;
   }
 
-  Builder.CreateUnreachable();
+  // Recreate the landingpad's return value for the 'resume' instruction.
+  llvm::Value *Exn = getExceptionFromSlot();
+  llvm::Value *Sel = getSelectorFromSlot();
 
-  Builder.restoreIP(SavedIP);
+  llvm::Type *LPadType = llvm::StructType::get(Exn->getType(),
+                                               Sel->getType(), NULL);
+  llvm::Value *LPadVal = llvm::UndefValue::get(LPadType);
+  LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val");
+  LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val");
 
+  Builder.CreateResume(LPadVal);
+  Builder.restoreIP(SavedIP);
   return EHResumeBlock;
 }
 
 void CodeGenFunction::EmitSEHTryStmt(const SEHTryStmt &S) {
   CGM.ErrorUnsupported(&S, "SEH __try");
 }
+
+void CodeGenFunction::EmitSEHLeaveStmt(const SEHLeaveStmt &S) {
+  CGM.ErrorUnsupported(&S, "SEH __leave");
+}
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index cb990b243fba..512b323ba109 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -21,6 +21,7 @@
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/Attr.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/IR/DataLayout.h"
@@ -53,13 +54,13 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
 llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
                                                     const Twine &Name) {
   if (!Builder.isNamePreserving())
-    return new llvm::AllocaInst(Ty, 0, "", AllocaInsertPt);
-  return new llvm::AllocaInst(Ty, 0, Name, AllocaInsertPt);
+    return new llvm::AllocaInst(Ty, nullptr, "", AllocaInsertPt);
+  return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
 }
 
 void CodeGenFunction::InitTempAlloca(llvm::AllocaInst *Var,
                                      llvm::Value *Init) {
-  llvm::StoreInst *Store = new llvm::StoreInst(Init, Var);
+  auto *Store = new llvm::StoreInst(Init, Var);
   llvm::BasicBlock *Block = AllocaInsertPt->getParent();
   Block->getInstList().insertAfter(&*AllocaInsertPt, Store);
 }
@@ -85,6 +86,7 @@ llvm::AllocaInst *CodeGenFunction::CreateMemTemp(QualType Ty,
 /// EvaluateExprAsBool - Perform the usual unary conversions on the specified
 /// expression and compare the result against zero, returning an Int1Ty value.
 llvm::Value *CodeGenFunction::EvaluateExprAsBool(const Expr *E) {
+  PGO.setCurrentStmt(E);
   if (const MemberPointerType *MPT = E->getType()->getAs<MemberPointerType>()) {
     llvm::Value *MemPtr = EmitScalarExpr(E);
     return CGM.getCXXABI().EmitMemberPointerIsNotNull(*this, MemPtr, MPT);
@@ -240,11 +242,11 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
     }
   }
 
-  CXXDestructorDecl *ReferenceTemporaryDtor = 0;
+  CXXDestructorDecl *ReferenceTemporaryDtor = nullptr;
   if (const RecordType *RT =
           E->getType()->getBaseElementTypeUnsafe()->getAs<RecordType>()) {
     // Get the destructor for the reference temporary.
-    CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(RT->getDecl());
+    auto *ClassDecl = cast<CXXRecordDecl>(RT->getDecl());
     if (!ClassDecl->hasTrivialDestructor())
       ReferenceTemporaryDtor = ClassDecl->getDestructor();
   }
@@ -322,7 +324,7 @@ LValue CodeGenFunction::EmitMaterializeTemporaryExpr(
     llvm::Value *Object = createReferenceTemporary(*this, M, E);
     LValue RefTempDst = MakeAddrLValue(Object, M->getType());
 
-    if (llvm::GlobalVariable *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
+    if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
       // We should not have emitted the initializer for this temporary as a
       // constant.
       assert(!Var->hasInitializer());
@@ -342,7 +344,7 @@ LValue CodeGenFunction::EmitMaterializeTemporaryExpr(
   for (unsigned I = 0, N = CommaLHSs.size(); I != N; ++I)
     EmitIgnoredExpr(CommaLHSs[I]);
 
-  if (const OpaqueValueExpr *opaque = dyn_cast<OpaqueValueExpr>(E)) {
+  if (const auto *opaque = dyn_cast<OpaqueValueExpr>(E)) {
     if (opaque->getType()->isRecordType()) {
       assert(Adjustments.empty());
       return EmitOpaqueValueLValue(opaque);
@@ -351,7 +353,7 @@ LValue CodeGenFunction::EmitMaterializeTemporaryExpr(
 
   // Create and initialize the reference temporary.
   llvm::Value *Object = createReferenceTemporary(*this, M, E);
-  if (llvm::GlobalVariable *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
+  if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
     // If the temporary is a global and has a constant initializer, we may
     // have already initialized it.
     if (!Var->hasInitializer()) {
@@ -389,7 +391,7 @@ LValue CodeGenFunction::EmitMaterializeTemporaryExpr(
     case SubobjectAdjustment::MemberPointerAdjustment: {
       llvm::Value *Ptr = EmitScalarExpr(Adjustment.Ptr.RHS);
       Object = CGM.getCXXABI().EmitMemberDataPointerAddress(
-                    *this, Object, Ptr, Adjustment.Ptr.MPT);
+          *this, E, Object, Ptr, Adjustment.Ptr.MPT);
       break;
     }
     }
@@ -405,7 +407,7 @@ CodeGenFunction::EmitReferenceBindingToExpr(const Expr *E) {
   assert(LV.isSimple());
   llvm::Value *Value = LV.getAddress();
 
-  if (SanitizePerformTypeCheck && !E->getType()->isFunctionType()) {
+  if (sanitizePerformTypeCheck() && !E->getType()->isFunctionType()) {
     // C++11 [dcl.ref]p5 (as amended by core issue 453):
     //   If a glvalue to which a reference is directly bound designates neither
     //   an existing object or function of an appropriate type nor a region of
@@ -439,10 +441,15 @@ static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low,
   return Builder.CreateMul(B1, KMul);
 }
 
+bool CodeGenFunction::sanitizePerformTypeCheck() const {
+  return SanOpts->Null | SanOpts->Alignment | SanOpts->ObjectSize |
+         SanOpts->Vptr;
+}
+
 void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
                                     llvm::Value *Address,
                                     QualType Ty, CharUnits Alignment) {
-  if (!SanitizePerformTypeCheck)
+  if (!sanitizePerformTypeCheck())
     return;
 
   // Don't check pointers outside the default address space. The null check
@@ -451,10 +458,12 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
   if (Address->getType()->getPointerAddressSpace())
     return;
 
-  llvm::Value *Cond = 0;
-  llvm::BasicBlock *Done = 0;
+  SanitizerScope SanScope(this);
 
-  if (SanOpts->Null) {
+  llvm::Value *Cond = nullptr;
+  llvm::BasicBlock *Done = nullptr;
+
+  if (SanOpts->Null || TCK == TCK_DowncastPointer) {
     // The glvalue must not be an empty glvalue.
     Cond = Builder.CreateICmpNE(
         Address, llvm::Constant::getNullValue(Address->getType()));
@@ -466,7 +475,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
       llvm::BasicBlock *Rest = createBasicBlock("not.null");
       Builder.CreateCondBr(Cond, Rest, Done);
       EmitBlock(Rest);
-      Cond = 0;
+      Cond = nullptr;
     }
   }
 
@@ -537,44 +546,48 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
     llvm::raw_svector_ostream Out(MangledName);
     CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty.getUnqualifiedType(),
                                                      Out);
-    llvm::hash_code TypeHash = hash_value(Out.str());
-
-    // Load the vptr, and compute hash_16_bytes(TypeHash, vptr).
-    llvm::Value *Low = llvm::ConstantInt::get(Int64Ty, TypeHash);
-    llvm::Type *VPtrTy = llvm::PointerType::get(IntPtrTy, 0);
-    llvm::Value *VPtrAddr = Builder.CreateBitCast(Address, VPtrTy);
-    llvm::Value *VPtrVal = Builder.CreateLoad(VPtrAddr);
-    llvm::Value *High = Builder.CreateZExt(VPtrVal, Int64Ty);
-
-    llvm::Value *Hash = emitHash16Bytes(Builder, Low, High);
-    Hash = Builder.CreateTrunc(Hash, IntPtrTy);
-
-    // Look the hash up in our cache.
-    const int CacheSize = 128;
-    llvm::Type *HashTable = llvm::ArrayType::get(IntPtrTy, CacheSize);
-    llvm::Value *Cache = CGM.CreateRuntimeVariable(HashTable,
-                                                   "__ubsan_vptr_type_cache");
-    llvm::Value *Slot = Builder.CreateAnd(Hash,
-                                          llvm::ConstantInt::get(IntPtrTy,
-                                                                 CacheSize-1));
-    llvm::Value *Indices[] = { Builder.getInt32(0), Slot };
-    llvm::Value *CacheVal =
-      Builder.CreateLoad(Builder.CreateInBoundsGEP(Cache, Indices));
-
-    // If the hash isn't in the cache, call a runtime handler to perform the
-    // hard work of checking whether the vptr is for an object of the right
-    // type. This will either fill in the cache and return, or produce a
-    // diagnostic.
-    llvm::Constant *StaticData[] = {
-      EmitCheckSourceLocation(Loc),
-      EmitCheckTypeDescriptor(Ty),
-      CGM.GetAddrOfRTTIDescriptor(Ty.getUnqualifiedType()),
-      llvm::ConstantInt::get(Int8Ty, TCK)
-    };
-    llvm::Value *DynamicData[] = { Address, Hash };
-    EmitCheck(Builder.CreateICmpEQ(CacheVal, Hash),
-              "dynamic_type_cache_miss", StaticData, DynamicData,
-              CRK_AlwaysRecoverable);
+
+    // Blacklist based on the mangled type.
+    if (!CGM.getSanitizerBlacklist().isBlacklistedType(Out.str())) {
+      llvm::hash_code TypeHash = hash_value(Out.str());
+
+      // Load the vptr, and compute hash_16_bytes(TypeHash, vptr).
+      llvm::Value *Low = llvm::ConstantInt::get(Int64Ty, TypeHash);
+      llvm::Type *VPtrTy = llvm::PointerType::get(IntPtrTy, 0);
+      llvm::Value *VPtrAddr = Builder.CreateBitCast(Address, VPtrTy);
+      llvm::Value *VPtrVal = Builder.CreateLoad(VPtrAddr);
+      llvm::Value *High = Builder.CreateZExt(VPtrVal, Int64Ty);
+
+      llvm::Value *Hash = emitHash16Bytes(Builder, Low, High);
+      Hash = Builder.CreateTrunc(Hash, IntPtrTy);
+
+      // Look the hash up in our cache.
+      const int CacheSize = 128;
+      llvm::Type *HashTable = llvm::ArrayType::get(IntPtrTy, CacheSize);
+      llvm::Value *Cache = CGM.CreateRuntimeVariable(HashTable,
+                                                     "__ubsan_vptr_type_cache");
+      llvm::Value *Slot = Builder.CreateAnd(Hash,
+                                            llvm::ConstantInt::get(IntPtrTy,
+                                                                   CacheSize-1));
+      llvm::Value *Indices[] = { Builder.getInt32(0), Slot };
+      llvm::Value *CacheVal =
+        Builder.CreateLoad(Builder.CreateInBoundsGEP(Cache, Indices));
+
+      // If the hash isn't in the cache, call a runtime handler to perform the
+      // hard work of checking whether the vptr is for an object of the right
+      // type. This will either fill in the cache and return, or produce a
+      // diagnostic.
+      llvm::Constant *StaticData[] = {
+        EmitCheckSourceLocation(Loc),
+        EmitCheckTypeDescriptor(Ty),
+        CGM.GetAddrOfRTTIDescriptor(Ty.getUnqualifiedType()),
+        llvm::ConstantInt::get(Int8Ty, TCK)
+      };
+      llvm::Value *DynamicData[] = { Address, Hash };
+      EmitCheck(Builder.CreateICmpEQ(CacheVal, Hash),
+                "dynamic_type_cache_miss", StaticData, DynamicData,
+                CRK_AlwaysRecoverable);
+    }
   }
 
   if (Done) {
@@ -589,7 +602,7 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) {
   // For compatibility with existing code, we treat arrays of length 0 or
   // 1 as flexible array members.
   const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe();
-  if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) {
+  if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
     if (CAT->getSize().ugt(1))
       return false;
   } else if (!isa<IncompleteArrayType>(AT))
@@ -598,10 +611,10 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) {
   E = E->IgnoreParens();
 
   // A flexible array member must be the last member in the class.
-  if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
+  if (const auto *ME = dyn_cast<MemberExpr>(E)) {
     // FIXME: If the base type of the member expr is not FD->getParent(),
     // this should not be treated as a flexible array member access.
-    if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+    if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
       RecordDecl::field_iterator FI(
           DeclContext::decl_iterator(const_cast<FieldDecl *>(FD)));
       return ++FI == FD->getParent()->field_end();
@@ -623,19 +636,19 @@ static llvm::Value *getArrayIndexingBound(
 
   Base = Base->IgnoreParens();
 
-  if (const CastExpr *CE = dyn_cast<CastExpr>(Base)) {
+  if (const auto *CE = dyn_cast<CastExpr>(Base)) {
     if (CE->getCastKind() == CK_ArrayToPointerDecay &&
         !isFlexibleArrayMemberExpr(CE->getSubExpr())) {
       IndexedType = CE->getSubExpr()->getType();
       const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe();
-      if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
+      if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
         return CGF.Builder.getInt(CAT->getSize());
-      else if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(AT))
+      else if (const auto *VAT = dyn_cast<VariableArrayType>(AT))
         return CGF.getVLASize(VAT).first;
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
@@ -643,6 +656,7 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
                                       bool Accessed) {
   assert(SanOpts->ArrayBounds &&
          "should not be called unless adding bounds checks");
+  SanitizerScope SanScope(this);
 
   QualType IndexedType;
   llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType);
@@ -704,7 +718,7 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
 
 RValue CodeGenFunction::GetUndefRValue(QualType Ty) {
   if (Ty->isVoidType())
-    return RValue::get(0);
+    return RValue::get(nullptr);
 
   switch (getEvaluationKind(Ty)) {
   case TEK_Complex: {
@@ -819,7 +833,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
     return EmitLambdaLValue(cast<LambdaExpr>(E));
 
   case Expr::ExprWithCleanupsClass: {
-    const ExprWithCleanups *cleanups = cast<ExprWithCleanups>(E);
+    const auto *cleanups = cast<ExprWithCleanups>(E);
     enterFullExpression(cleanups);
     RunCleanupsScope Scope(*this);
     return EmitLValue(cleanups->getSubExpr());
@@ -887,8 +901,8 @@ static bool isConstantEmittableObjectType(QualType type) {
 
   // Otherwise, all object types satisfy this except C++ classes with
   // mutable subobjects or non-trivial copy/destroy behavior.
-  if (const RecordType *RT = dyn_cast<RecordType>(type))
-    if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+  if (const auto *RT = dyn_cast<RecordType>(type))
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
       if (RD->hasMutableFields() || !RD->isTrivial())
         return false;
 
@@ -910,7 +924,7 @@ enum ConstantEmissionKind {
 };
 static ConstantEmissionKind checkVarTypeForConstantEmission(QualType type) {
   type = type.getCanonicalType();
-  if (const ReferenceType *ref = dyn_cast<ReferenceType>(type)) {
+  if (const auto *ref = dyn_cast<ReferenceType>(type)) {
     if (isConstantEmittableObjectType(ref->getPointeeType()))
       return CEK_AsValueOrReference;
     return CEK_AsReferenceOnly;
@@ -933,7 +947,7 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) {
   ConstantEmissionKind CEK;
   if (isa<ParmVarDecl>(value)) {
     CEK = CEK_None;
-  } else if (VarDecl *var = dyn_cast<VarDecl>(value)) {
+  } else if (auto *var = dyn_cast<VarDecl>(value)) {
     CEK = checkVarTypeForConstantEmission(var->getType());
   } else if (isa<EnumConstantDecl>(value)) {
     CEK = CEK_AsValueOnly;
@@ -1046,7 +1060,7 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
   llvm::APInt Min, End;
   if (!getRangeForType(*this, Ty, Min, End,
                        CGM.getCodeGenOpts().StrictEnums))
-    return 0;
+    return nullptr;
 
   llvm::MDBuilder MDHelper(getLLVMContext());
   return MDHelper.createRange(Min, End);
@@ -1064,7 +1078,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
     const llvm::Type *EltTy =
     cast<llvm::PointerType>(Addr->getType())->getElementType();
 
-    const llvm::VectorType *VTy = cast<llvm::VectorType>(EltTy);
+    const auto *VTy = cast<llvm::VectorType>(EltTy);
 
     // Handle vectors of size 3, like size 4 for better performance.
     if (VTy->getNumElements() == 3) {
@@ -1118,6 +1132,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
 
   if ((SanOpts->Bool && hasBooleanRepresentation(Ty)) ||
       (SanOpts->Enum && Ty->getAs<EnumType>())) {
+    SanitizerScope SanScope(this);
     llvm::APInt Min, End;
     if (getRangeForType(*this, Ty, Min, End, true)) {
       --End;
@@ -1180,7 +1195,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
   // Handle vectors differently to get better performance.
   if (Ty->isVectorType()) {
     llvm::Type *SrcTy = Value->getType();
-    llvm::VectorType *VecTy = cast<llvm::VectorType>(SrcTy);
+    auto *VecTy = cast<llvm::VectorType>(SrcTy);
     // Handle vec3 special.
     if (VecTy->getNumElements() == 3) {
       llvm::LLVMContext &VMContext = getLLVMContext();
@@ -1201,7 +1216,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
                                           MaskV, "extractVec");
       SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
     }
-    llvm::PointerType *DstPtr = cast<llvm::PointerType>(Addr->getType());
+    auto *DstPtr = cast<llvm::PointerType>(Addr->getType());
     if (DstPtr->getElementType() != SrcTy) {
       llvm::Type *MemTy =
       llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace());
@@ -1275,6 +1290,10 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
   if (LV.isExtVectorElt())
     return EmitLoadOfExtVectorElementLValue(LV);
 
+  // Global Register variables always invoke intrinsics
+  if (LV.isGlobalReg())
+    return EmitLoadOfGlobalRegLValue(LV);
+
   assert(LV.isBitField() && "Unknown LValue type!");
   return EmitLoadOfBitfieldLValue(LV);
 }
@@ -1325,7 +1344,7 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
   const VectorType *ExprVT = LV.getType()->getAs<VectorType>();
   if (!ExprVT) {
     unsigned InIdx = getAccessedFieldNo(0, Elts);
-    llvm::Value *Elt = llvm::ConstantInt::get(Int32Ty, InIdx);
+    llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
     return RValue::get(Builder.CreateExtractElement(Vec, Elt));
   }
 
@@ -1342,6 +1361,26 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
   return RValue::get(Vec);
 }
 
+/// @brief Load of global gamed gegisters are always calls to intrinsics.
+RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) {
+  assert((LV.getType()->isIntegerType() || LV.getType()->isPointerType()) &&
+         "Bad type for register variable");
+  llvm::MDNode *RegName = dyn_cast<llvm::MDNode>(LV.getGlobalReg());
+  assert(RegName && "Register LValue is not metadata");
+
+  // We accept integer and pointer types only
+  llvm::Type *OrigTy = CGM.getTypes().ConvertType(LV.getType());
+  llvm::Type *Ty = OrigTy;
+  if (OrigTy->isPointerTy())
+    Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy);
+  llvm::Type *Types[] = { Ty };
+
+  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+  llvm::Value *Call = Builder.CreateCall(F, RegName);
+  if (OrigTy->isPointerTy())
+    Call = Builder.CreateIntToPtr(Call, OrigTy);
+  return RValue::get(Call);
+}
 
 
 /// EmitStoreThroughLValue - Store the specified rvalue into the specified
@@ -1369,6 +1408,9 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
     if (Dst.isExtVectorElt())
       return EmitStoreThroughExtVectorComponentLValue(Src, Dst);
 
+    if (Dst.isGlobalReg())
+      return EmitStoreThroughGlobalRegLValue(Src, Dst);
+
     assert(Dst.isBitField() && "Unknown LValue type");
     return EmitStoreThroughBitfieldLValue(Src, Dst);
   }
@@ -1571,7 +1613,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
   } else {
     // If the Src is a scalar (not a vector) it must be updating one element.
     unsigned InIdx = getAccessedFieldNo(0, Elts);
-    llvm::Value *Elt = llvm::ConstantInt::get(Int32Ty, InIdx);
+    llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
     Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
   }
 
@@ -1580,7 +1622,28 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
   Store->setAlignment(Dst.getAlignment().getQuantity());
 }
 
-// setObjCGCLValueClass - sets class of he lvalue for the purpose of
+/// @brief Store of global named registers are always calls to intrinsics.
+void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) {
+  assert((Dst.getType()->isIntegerType() || Dst.getType()->isPointerType()) &&
+         "Bad type for register variable");
+  llvm::MDNode *RegName = dyn_cast<llvm::MDNode>(Dst.getGlobalReg());
+  assert(RegName && "Register LValue is not metadata");
+
+  // We accept integer and pointer types only
+  llvm::Type *OrigTy = CGM.getTypes().ConvertType(Dst.getType());
+  llvm::Type *Ty = OrigTy;
+  if (OrigTy->isPointerTy())
+    Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy);
+  llvm::Type *Types[] = { Ty };
+
+  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
+  llvm::Value *Value = Src.getScalarVal();
+  if (OrigTy->isPointerTy())
+    Value = Builder.CreatePtrToInt(Value, Ty);
+  Builder.CreateCall2(F, RegName, Value);
+}
+
+// setObjCGCLValueClass - sets class of the lvalue for the purpose of
 // generating write-barries API. It is currently a global, ivar,
 // or neither.
 static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
@@ -1602,14 +1665,14 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
       }
     }
     LV.setObjCIvar(true);
-    ObjCIvarRefExpr *Exp = cast<ObjCIvarRefExpr>(const_cast<Expr*>(E));
+    auto *Exp = cast<ObjCIvarRefExpr>(const_cast<Expr *>(E));
     LV.setBaseIvarExp(Exp->getBase());
     LV.setObjCArray(E->getType()->isArrayType());
     return;
   }
 
-  if (const DeclRefExpr *Exp = dyn_cast<DeclRefExpr>(E)) {
-    if (const VarDecl *VD = dyn_cast<VarDecl>(Exp->getDecl())) {
+  if (const auto *Exp = dyn_cast<DeclRefExpr>(E)) {
+    if (const auto *VD = dyn_cast<VarDecl>(Exp->getDecl())) {
       if (VD->hasGlobalStorage()) {
         LV.setGlobalObjCRef(true);
         LV.setThreadLocalRef(VD->getTLSKind() != VarDecl::TLS_None);
@@ -1619,12 +1682,12 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
     return;
   }
 
-  if (const UnaryOperator *Exp = dyn_cast<UnaryOperator>(E)) {
+  if (const auto *Exp = dyn_cast<UnaryOperator>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const ParenExpr *Exp = dyn_cast<ParenExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ParenExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     if (LV.isObjCIvar()) {
       // If cast is to a structure pointer, follow gcc's behavior and make it
@@ -1638,27 +1701,27 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
     return;
   }
 
-  if (const GenericSelectionExpr *Exp = dyn_cast<GenericSelectionExpr>(E)) {
+  if (const auto *Exp = dyn_cast<GenericSelectionExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getResultExpr(), LV);
     return;
   }
 
-  if (const ImplicitCastExpr *Exp = dyn_cast<ImplicitCastExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ImplicitCastExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const CStyleCastExpr *Exp = dyn_cast<CStyleCastExpr>(E)) {
+  if (const auto *Exp = dyn_cast<CStyleCastExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const ObjCBridgedCastExpr *Exp = dyn_cast<ObjCBridgedCastExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ObjCBridgedCastExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const ArraySubscriptExpr *Exp = dyn_cast<ArraySubscriptExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ArraySubscriptExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getBase(), LV);
     if (LV.isObjCIvar() && !LV.isObjCArray())
       // Using array syntax to assigning to what an ivar points to is not
@@ -1671,7 +1734,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
     return;
   }
 
-  if (const MemberExpr *Exp = dyn_cast<MemberExpr>(E)) {
+  if (const auto *Exp = dyn_cast<MemberExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getBase(), LV, true);
     // We don't know if member is an 'ivar', but this flag is looked at
     // only in the context of LV.isObjCIvar().
@@ -1690,11 +1753,16 @@ EmitBitCastOfLValueToProperType(CodeGenFunction &CGF,
 
 static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
                                       const Expr *E, const VarDecl *VD) {
+  QualType T = E->getType();
+
+  // If it's thread_local, emit a call to its wrapper function instead.
+  if (VD->getTLSKind() == VarDecl::TLS_Dynamic)
+    return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
+
   llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
   llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
   V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
   CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
-  QualType T = E->getType();
   LValue LV;
   if (VD->getType()->isReferenceType()) {
     llvm::LoadInst *LI = CGF.Builder.CreateLoad(V);
@@ -1702,7 +1770,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
     V = LI;
     LV = CGF.MakeNaturalAlignAddrLValue(V, T);
   } else {
-    LV = CGF.MakeAddrLValue(V, E->getType(), Alignment);
+    LV = CGF.MakeAddrLValue(V, T, Alignment);
   }
   setObjCGCLValueClass(CGF.getContext(), E, LV);
   return LV;
@@ -1718,7 +1786,7 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF,
       // isn't the same as the type of a use.  Correct for this with a
       // bitcast.
       QualType NoProtoType =
-          CGF.getContext().getFunctionNoProtoType(Proto->getResultType());
+          CGF.getContext().getFunctionNoProtoType(Proto->getReturnType());
       NoProtoType = CGF.getContext().getPointerType(NoProtoType);
       V = CGF.Builder.CreateBitCast(V, CGF.ConvertType(NoProtoType));
     }
@@ -1734,14 +1802,44 @@ static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD,
   return CGF.EmitLValueForField(LV, FD);
 }
 
+/// Named Registers are named metadata pointing to the register name
+/// which will be read from/written to as an argument to the intrinsic
+/// @llvm.read/write_register.
+/// So far, only the name is being passed down, but other options such as
+/// register type, allocation type or even optimization options could be
+/// passed down via the metadata node.
+static LValue EmitGlobalNamedRegister(const VarDecl *VD,
+                                      CodeGenModule &CGM,
+                                      CharUnits Alignment) {
+  SmallString<64> Name("llvm.named.register.");
+  AsmLabelAttr *Asm = VD->getAttr<AsmLabelAttr>();
+  assert(Asm->getLabel().size() < 64-Name.size() &&
+      "Register name too big");
+  Name.append(Asm->getLabel());
+  llvm::NamedMDNode *M =
+    CGM.getModule().getOrInsertNamedMetadata(Name);
+  if (M->getNumOperands() == 0) {
+    llvm::MDString *Str = llvm::MDString::get(CGM.getLLVMContext(),
+                                              Asm->getLabel());
+    llvm::Value *Ops[] = { Str };
+    M->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops));
+  }
+  return LValue::MakeGlobalReg(M->getOperand(0), VD->getType(), Alignment);
+}
+
 LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   const NamedDecl *ND = E->getDecl();
   CharUnits Alignment = getContext().getDeclAlign(ND);
   QualType T = E->getType();
 
-  // A DeclRefExpr for a reference initialized by a constant expression can
-  // appear without being odr-used. Directly emit the constant initializer.
-  if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+  if (const auto *VD = dyn_cast<VarDecl>(ND)) {
+    // Global Named registers access via intrinsics only
+    if (VD->getStorageClass() == SC_Register &&
+        VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())
+      return EmitGlobalNamedRegister(VD, CGM, Alignment);
+
+    // A DeclRefExpr for a reference initialized by a constant expression can
+    // appear without being odr-used. Directly emit the constant initializer.
     const Expr *Init = VD->getAnyInitializer(VD);
     if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() &&
         VD->isUsableInConstantExpressions(getContext()) &&
@@ -1762,19 +1860,15 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
          "Should not use decl without marking it used!");
 
   if (ND->hasAttr<WeakRefAttr>()) {
-    const ValueDecl *VD = cast<ValueDecl>(ND);
+    const auto *VD = cast<ValueDecl>(ND);
     llvm::Constant *Aliasee = CGM.GetWeakRefReference(VD);
     return MakeAddrLValue(Aliasee, T, Alignment);
   }
 
-  if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+  if (const auto *VD = dyn_cast<VarDecl>(ND)) {
     // Check if this is a global variable.
-    if (VD->hasLinkage() || VD->isStaticDataMember()) {
-      // If it's thread_local, emit a call to its wrapper function instead.
-      if (VD->getTLSKind() == VarDecl::TLS_Dynamic)
-        return CGM.getCXXABI().EmitThreadLocalDeclRefExpr(*this, E);
+    if (VD->hasLinkage() || VD->isStaticDataMember())
       return EmitGlobalVarDeclLValue(*this, E, VD);
-    }
 
     bool isBlockVariable = VD->hasAttr<BlocksAttr>();
 
@@ -1830,7 +1924,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
     return LV;
   }
 
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND))
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND))
     return EmitFunctionDeclLValue(*this, E, FD);
 
   llvm_unreachable("Unhandled DeclRefExpr");
@@ -1907,30 +2001,6 @@ LValue CodeGenFunction::EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E) {
                         E->getType());
 }
 
-static llvm::Constant*
-GetAddrOfConstantWideString(StringRef Str,
-                            const char *GlobalName,
-                            ASTContext &Context,
-                            QualType Ty, SourceLocation Loc,
-                            CodeGenModule &CGM) {
-
-  StringLiteral *SL = StringLiteral::Create(Context,
-                                            Str,
-                                            StringLiteral::Wide,
-                                            /*Pascal = */false,
-                                            Ty, Loc);
-  llvm::Constant *C = CGM.GetConstantArrayFromStringLiteral(SL);
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(CGM.getModule(), C->getType(),
-                             !CGM.getLangOpts().WritableStrings,
-                             llvm::GlobalValue::PrivateLinkage,
-                             C, GlobalName);
-  const unsigned WideAlignment =
-    Context.getTypeAlignInChars(Ty).getQuantity();
-  GV->setAlignment(WideAlignment);
-  return GV;
-}
-
 static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
                                     SmallString<32>& Target) {
   Target.resize(CharByteWidth * (Source.size() + 1));
@@ -1951,37 +2021,31 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
   case PredefinedExpr::Function:
   case PredefinedExpr::LFunction:
   case PredefinedExpr::FuncDName:
+  case PredefinedExpr::FuncSig:
   case PredefinedExpr::PrettyFunction: {
     PredefinedExpr::IdentType IdentType = E->getIdentType();
-    std::string GlobalVarName;
+    std::string GVName;
 
+    // FIXME: We should use the string literal mangling for the Microsoft C++
+    // ABI so that strings get merged.
     switch (IdentType) {
     default: llvm_unreachable("Invalid type");
-    case PredefinedExpr::Func:
-      GlobalVarName = "__func__.";
-      break;
-    case PredefinedExpr::Function:
-      GlobalVarName = "__FUNCTION__.";
-      break;
-    case PredefinedExpr::FuncDName:
-      GlobalVarName = "__FUNCDNAME__.";
-      break;
-    case PredefinedExpr::LFunction:
-      GlobalVarName = "L__FUNCTION__.";
-      break;
-    case PredefinedExpr::PrettyFunction:
-      GlobalVarName = "__PRETTY_FUNCTION__.";
-      break;
+    case PredefinedExpr::Func:           GVName = "__func__."; break;
+    case PredefinedExpr::Function:       GVName = "__FUNCTION__."; break;
+    case PredefinedExpr::FuncDName:      GVName = "__FUNCDNAME__."; break;
+    case PredefinedExpr::FuncSig:        GVName = "__FUNCSIG__."; break;
+    case PredefinedExpr::LFunction:      GVName = "L__FUNCTION__."; break;
+    case PredefinedExpr::PrettyFunction: GVName = "__PRETTY_FUNCTION__."; break;
     }
 
     StringRef FnName = CurFn->getName();
     if (FnName.startswith("\01"))
       FnName = FnName.substr(1);
-    GlobalVarName += FnName;
+    GVName += FnName;
 
     // If this is outside of a function use the top level decl.
     const Decl *CurDecl = CurCodeDecl;
-    if (CurDecl == 0 || isa<VarDecl>(CurDecl))
+    if (!CurDecl || isa<VarDecl>(CurDecl))
       CurDecl = getContext().getTranslationUnitDecl();
 
     const Type *ElemType = E->getType()->getArrayElementTypeNoTypeQual();
@@ -2005,18 +2069,14 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
     if (ElemType->isWideCharType()) {
       SmallString<32> RawChars;
       ConvertUTF8ToWideString(
-          getContext().getTypeSizeInChars(ElemType).getQuantity(),
-          FunctionName, RawChars);
-      C = GetAddrOfConstantWideString(RawChars,
-                                      GlobalVarName.c_str(),
-                                      getContext(),
-                                      E->getType(),
-                                      E->getLocation(),
-                                      CGM);
+          getContext().getTypeSizeInChars(ElemType).getQuantity(), FunctionName,
+          RawChars);
+      StringLiteral *SL = StringLiteral::Create(
+          getContext(), RawChars, StringLiteral::Wide,
+          /*Pascal = */ false, E->getType(), E->getLocation());
+      C = CGM.GetAddrOfConstantStringFromLiteral(SL);
     } else {
-      C = CGM.GetAddrOfConstantCString(FunctionName,
-                                       GlobalVarName.c_str(),
-                                       1);
+      C = CGM.GetAddrOfConstantCString(FunctionName, GVName.c_str(), 1);
     }
     return MakeAddrLValue(C, E->getType());
   }
@@ -2034,7 +2094,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
 /// integer, 1 for a floating point value, and -1 for anything else.
 llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   // Only emit each type's descriptor once.
-  if (llvm::Constant *C = CGM.getTypeDescriptor(T))
+  if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T))
     return C;
 
   uint16_t TypeKind = -1;
@@ -2054,7 +2114,7 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   SmallString<32> Buffer;
   CGM.getDiags().ConvertArgToString(DiagnosticsEngine::ak_qualtype,
                                     (intptr_t)T.getAsOpaquePtr(),
-                                    0, 0, 0, 0, 0, 0, Buffer,
+                                    StringRef(), StringRef(), None, Buffer,
                                     ArrayRef<intptr_t>());
 
   llvm::Constant *Components[] = {
@@ -2063,15 +2123,14 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   };
   llvm::Constant *Descriptor = llvm::ConstantStruct::getAnon(Components);
 
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(CGM.getModule(), Descriptor->getType(),
-                             /*isConstant=*/true,
-                             llvm::GlobalVariable::PrivateLinkage,
-                             Descriptor);
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), Descriptor->getType(),
+      /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, Descriptor);
   GV->setUnnamedAddr(true);
+  CGM.disableSanitizerForGlobal(GV);
 
   // Remember the descriptor for this type.
-  CGM.setTypeDescriptor(T, GV);
+  CGM.setTypeDescriptorInMap(T, GV);
 
   return GV;
 }
@@ -2112,14 +2171,23 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) {
 /// \endcode
 /// For an invalid SourceLocation, the Filename pointer is null.
 llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
+  llvm::Constant *Filename;
+  int Line, Column;
+
   PresumedLoc PLoc = getContext().getSourceManager().getPresumedLoc(Loc);
+  if (PLoc.isValid()) {
+    auto FilenameGV = CGM.GetAddrOfConstantCString(PLoc.getFilename(), ".src");
+    CGM.disableSanitizerForGlobal(FilenameGV);
+    Filename = FilenameGV;
+    Line = PLoc.getLine();
+    Column = PLoc.getColumn();
+  } else {
+    Filename = llvm::Constant::getNullValue(Int8PtrTy);
+    Line = Column = 0;
+  }
 
-  llvm::Constant *Data[] = {
-    PLoc.isValid() ? CGM.GetAddrOfConstantCString(PLoc.getFilename(), ".src")
-                   : llvm::Constant::getNullValue(Int8PtrTy),
-    Builder.getInt32(PLoc.isValid() ? PLoc.getLine() : 0),
-    Builder.getInt32(PLoc.isValid() ? PLoc.getColumn() : 0)
-  };
+  llvm::Constant *Data[] = {Filename, Builder.getInt32(Line),
+                            Builder.getInt32(Column)};
 
   return llvm::ConstantStruct::getAnon(Data);
 }
@@ -2129,6 +2197,7 @@ void CodeGenFunction::EmitCheck(llvm::Value *Checked, StringRef CheckName,
                                 ArrayRef<llvm::Value *> DynamicArgs,
                                 CheckRecoverableKind RecoverKind) {
   assert(SanOpts != &SanitizerOptions::Disabled);
+  assert(IsSanitizerScope);
 
   if (CGM.getCodeGenOpts().SanitizeUndefinedTrapOnError) {
     assert (RecoverKind != CRK_AlwaysRecoverable &&
@@ -2151,10 +2220,11 @@ void CodeGenFunction::EmitCheck(llvm::Value *Checked, StringRef CheckName,
   EmitBlock(Handler);
 
   llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
-  llvm::GlobalValue *InfoPtr =
+  auto *InfoPtr =
       new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
                                llvm::GlobalVariable::PrivateLinkage, Info);
   InfoPtr->setUnnamedAddr(true);
+  CGM.disableSanitizerForGlobal(InfoPtr);
 
   SmallVector<llvm::Value *, 4> Args;
   SmallVector<llvm::Type *, 4> ArgTypes;
@@ -2171,9 +2241,9 @@ void CodeGenFunction::EmitCheck(llvm::Value *Checked, StringRef CheckName,
     ArgTypes.push_back(IntPtrTy);
   }
 
-  bool Recover = (RecoverKind == CRK_AlwaysRecoverable) ||
-                 ((RecoverKind == CRK_Recoverable) &&
-                   CGM.getCodeGenOpts().SanitizeRecover);
+  bool Recover = RecoverKind == CRK_AlwaysRecoverable ||
+                 (RecoverKind == CRK_Recoverable &&
+                  CGM.getCodeGenOpts().SanitizeRecover);
 
   llvm::FunctionType *FnType =
     llvm::FunctionType::get(CGM.VoidTy, ArgTypes, false);
@@ -2185,15 +2255,14 @@ void CodeGenFunction::EmitCheck(llvm::Value *Checked, StringRef CheckName,
   B.addAttribute(llvm::Attribute::UWTable);
 
   // Checks that have two variants use a suffix to differentiate them
-  bool NeedsAbortSuffix = (RecoverKind != CRK_Unrecoverable) &&
-                           !CGM.getCodeGenOpts().SanitizeRecover;
+  bool NeedsAbortSuffix = RecoverKind != CRK_Unrecoverable &&
+                          !CGM.getCodeGenOpts().SanitizeRecover;
   std::string FunctionName = ("__ubsan_handle_" + CheckName +
                               (NeedsAbortSuffix? "_abort" : "")).str();
-  llvm::Value *Fn =
-    CGM.CreateRuntimeFunction(FnType, FunctionName,
-                              llvm::AttributeSet::get(getLLVMContext(),
-                                              llvm::AttributeSet::FunctionIndex,
-                                                      B));
+  llvm::Value *Fn = CGM.CreateRuntimeFunction(
+      FnType, FunctionName,
+      llvm::AttributeSet::get(getLLVMContext(),
+                              llvm::AttributeSet::FunctionIndex, B));
   llvm::CallInst *HandlerCall = EmitNounwindRuntimeCall(Fn, Args);
   if (Recover) {
     Builder.CreateBr(Cont);
@@ -2230,14 +2299,14 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
 /// array to pointer, return the array subexpression.
 static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
   // If this isn't just an array->pointer decay, bail out.
-  const CastExpr *CE = dyn_cast<CastExpr>(E);
-  if (CE == 0 || CE->getCastKind() != CK_ArrayToPointerDecay)
-    return 0;
+  const auto *CE = dyn_cast<CastExpr>(E);
+  if (!CE || CE->getCastKind() != CK_ArrayToPointerDecay)
+    return nullptr;
 
   // If this is a decay from variable width array, bail out.
   const Expr *SubExpr = CE->getSubExpr();
   if (SubExpr->getType()->isVariableArrayType())
-    return 0;
+    return nullptr;
 
   return SubExpr;
 }
@@ -2258,7 +2327,6 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     // Emit the vector as an lvalue to get its address.
     LValue LHS = EmitLValue(E->getBase());
     assert(LHS.isSimple() && "Can only subscript lvalue vectors here!");
-    Idx = Builder.CreateIntCast(Idx, Int32Ty, IdxSigned, "vidx");
     return LValue::MakeVectorElt(LHS.getAddress(), Idx,
                                  E->getBase()->getType(), LHS.getAlignment());
   }
@@ -2269,7 +2337,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
 
   // We know that the pointer points to a type of the correct size, unless the
   // size is a VLA or Objective-C interface.
-  llvm::Value *Address = 0;
+  llvm::Value *Address = nullptr;
   CharUnits ArrayAlignment;
   if (const VariableArrayType *vla =
         getContext().getAsVariableArrayType(E->getType())) {
@@ -2315,7 +2383,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     LValue ArrayLV;
     // For simple multidimensional array indexing, set the 'accessed' flag for
     // better bounds-checking of the base expression.
-    if (const ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(Array))
+    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Array))
       ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true);
     else
       ArrayLV = EmitLValue(Array);
@@ -2442,16 +2510,16 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
     BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess);
 
   NamedDecl *ND = E->getMemberDecl();
-  if (FieldDecl *Field = dyn_cast<FieldDecl>(ND)) {
+  if (auto *Field = dyn_cast<FieldDecl>(ND)) {
     LValue LV = EmitLValueForField(BaseLV, Field);
     setObjCGCLValueClass(getContext(), E, LV);
     return LV;
   }
 
-  if (VarDecl *VD = dyn_cast<VarDecl>(ND))
+  if (auto *VD = dyn_cast<VarDecl>(ND))
     return EmitGlobalVarDeclLValue(*this, E, VD);
 
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND))
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND))
     return EmitFunctionDeclLValue(*this, E, FD);
 
   llvm_unreachable("Unhandled member declaration!");
@@ -2641,6 +2709,19 @@ LValue CodeGenFunction::EmitInitListLValue(const InitListExpr *E) {
   return EmitLValue(E->getInit(0));
 }
 
+/// Emit the operand of a glvalue conditional operator. This is either a glvalue
+/// or a (possibly-parenthesized) throw-expression. If this is a throw, no
+/// LValue is returned and the current block has been terminated.
+static Optional<LValue> EmitLValueOrThrowExpression(CodeGenFunction &CGF,
+                                                    const Expr *Operand) {
+  if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Operand->IgnoreParens())) {
+    CGF.EmitCXXThrowExpr(ThrowExpr, /*KeepInsertionPoint*/false);
+    return None;
+  }
+
+  return CGF.EmitLValue(Operand);
+}
+
 LValue CodeGenFunction::
 EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
   if (!expr->isGLValue()) {
@@ -2651,6 +2732,7 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
   }
 
   OpaqueValueMapping binding(*this, expr);
+  RegionCounter Cnt = getPGORegionCounter(expr);
 
   const Expr *condExpr = expr->getCond();
   bool CondExprBool;
@@ -2658,8 +2740,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
     const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr();
     if (!CondExprBool) std::swap(live, dead);
 
-    if (!ContainsLabel(dead))
+    if (!ContainsLabel(dead)) {
+      // If the true case is live, we need to track its region.
+      if (CondExprBool)
+        Cnt.beginRegion(Builder);
       return EmitLValue(live);
+    }
   }
 
   llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true");
@@ -2667,36 +2753,46 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
   llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
 
   ConditionalEvaluation eval(*this);
-  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock);
+  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
 
   // Any temporaries created here are conditional.
   EmitBlock(lhsBlock);
+  Cnt.beginRegion(Builder);
   eval.begin(*this);
-  LValue lhs = EmitLValue(expr->getTrueExpr());
+  Optional<LValue> lhs =
+      EmitLValueOrThrowExpression(*this, expr->getTrueExpr());
   eval.end(*this);
 
-  if (!lhs.isSimple())
+  if (lhs && !lhs->isSimple())
     return EmitUnsupportedLValue(expr, "conditional operator");
 
   lhsBlock = Builder.GetInsertBlock();
-  Builder.CreateBr(contBlock);
+  if (lhs)
+    Builder.CreateBr(contBlock);
 
   // Any temporaries created here are conditional.
   EmitBlock(rhsBlock);
   eval.begin(*this);
-  LValue rhs = EmitLValue(expr->getFalseExpr());
+  Optional<LValue> rhs =
+      EmitLValueOrThrowExpression(*this, expr->getFalseExpr());
   eval.end(*this);
-  if (!rhs.isSimple())
+  if (rhs && !rhs->isSimple())
     return EmitUnsupportedLValue(expr, "conditional operator");
   rhsBlock = Builder.GetInsertBlock();
 
   EmitBlock(contBlock);
 
-  llvm::PHINode *phi = Builder.CreatePHI(lhs.getAddress()->getType(), 2,
-                                         "cond-lvalue");
-  phi->addIncoming(lhs.getAddress(), lhsBlock);
-  phi->addIncoming(rhs.getAddress(), rhsBlock);
-  return MakeAddrLValue(phi, expr->getType());
+  if (lhs && rhs) {
+    llvm::PHINode *phi = Builder.CreatePHI(lhs->getAddress()->getType(),
+                                           2, "cond-lvalue");
+    phi->addIncoming(lhs->getAddress(), lhsBlock);
+    phi->addIncoming(rhs->getAddress(), rhsBlock);
+    return MakeAddrLValue(phi, expr->getType());
+  } else {
+    assert((lhs || rhs) &&
+           "both operands of glvalue conditional are throw-expressions?");
+    return lhs ? *lhs : *rhs;
+  }
 }
 
 /// EmitCastLValue - Casts are never lvalues unless that cast is to a reference
@@ -2744,6 +2840,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_ARCReclaimReturnedObject:
   case CK_ARCExtendBlockObject:
   case CK_CopyAndAutoreleaseBlockObject:
+  case CK_AddressSpaceConversion:
     return EmitUnsupportedLValue(E, "unexpected cast lvalue");
 
   case CK_Dependent:
@@ -2760,7 +2857,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_Dynamic: {
     LValue LV = EmitLValue(E->getSubExpr());
     llvm::Value *V = LV.getAddress();
-    const CXXDynamicCastExpr *DCE = cast<CXXDynamicCastExpr>(E);
+    const auto *DCE = cast<CXXDynamicCastExpr>(E);
     return MakeAddrLValue(EmitDynamicCast(V, DCE), E->getType());
   }
 
@@ -2776,8 +2873,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_DerivedToBase: {
     const RecordType *DerivedClassTy =
       E->getSubExpr()->getType()->getAs<RecordType>();
-    CXXRecordDecl *DerivedClassDecl =
-      cast<CXXRecordDecl>(DerivedClassTy->getDecl());
+    auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl());
 
     LValue LV = EmitLValue(E->getSubExpr());
     llvm::Value *This = LV.getAddress();
@@ -2794,8 +2890,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
     return EmitAggExprToLValue(E);
   case CK_BaseToDerived: {
     const RecordType *DerivedClassTy = E->getType()->getAs<RecordType>();
-    CXXRecordDecl *DerivedClassDecl =
-      cast<CXXRecordDecl>(DerivedClassTy->getDecl());
+    auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl());
 
     LValue LV = EmitLValue(E->getSubExpr());
 
@@ -2807,7 +2902,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
 
     // C++11 [expr.static.cast]p2: Behavior is undefined if a downcast is
     // performed and the object is not of the derived type.
-    if (SanitizePerformTypeCheck)
+    if (sanitizePerformTypeCheck())
       EmitTypeCheck(TCK_DowncastReference, E->getExprLoc(),
                     Derived, E->getType());
 
@@ -2815,7 +2910,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   }
   case CK_LValueBitCast: {
     // This must be a reinterpret_cast (or c-style equivalent).
-    const ExplicitCastExpr *CE = cast<ExplicitCastExpr>(E);
+    const auto *CE = cast<ExplicitCastExpr>(E);
 
     LValue LV = EmitLValue(E->getSubExpr());
     llvm::Value *V = Builder.CreateBitCast(LV.getAddress(),
@@ -2867,6 +2962,10 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
     SourceLocation Loc = E->getLocStart();
     // Force column info to be generated so we can differentiate
     // multiple call sites on the same line in the debug info.
+    // FIXME: This is insufficient. Two calls coming from the same macro
+    // expansion will still get the same line/column and break debug info. It's
+    // possible that LLVM can be fixed to not rely on this uniqueness, at which
+    // point this workaround can be removed.
     const FunctionDecl* Callee = E->getDirectCallee();
     bool ForceColumnInfo = Callee && Callee->isInlineSpecified();
     DI->EmitLocation(Builder, Loc, ForceColumnInfo);
@@ -2876,10 +2975,10 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
   if (E->getCallee()->getType()->isBlockPointerType())
     return EmitBlockCallExpr(E, ReturnValue);
 
-  if (const CXXMemberCallExpr *CE = dyn_cast<CXXMemberCallExpr>(E))
+  if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
     return EmitCXXMemberCallExpr(CE, ReturnValue);
 
-  if (const CUDAKernelCallExpr *CE = dyn_cast<CUDAKernelCallExpr>(E))
+  if (const auto *CE = dyn_cast<CUDAKernelCallExpr>(E))
     return EmitCUDAKernelCallExpr(CE, ReturnValue);
 
   const Decl *TargetDecl = E->getCalleeDecl();
@@ -2888,12 +2987,12 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
       return EmitBuiltinExpr(FD, builtinID, E);
   }
 
-  if (const CXXOperatorCallExpr *CE = dyn_cast<CXXOperatorCallExpr>(E))
+  if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E))
     if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(TargetDecl))
       return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue);
 
-  if (const CXXPseudoDestructorExpr *PseudoDtor
-          = dyn_cast<CXXPseudoDestructorExpr>(E->getCallee()->IgnoreParens())) {
+  if (const auto *PseudoDtor =
+          dyn_cast<CXXPseudoDestructorExpr>(E->getCallee()->IgnoreParens())) {
     QualType DestroyedType = PseudoDtor->getDestroyedType();
     if (getLangOpts().ObjCAutoRefCount &&
         DestroyedType->isObjCLifetimeType() &&
@@ -2903,7 +3002,7 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
       //   If the pseudo-expression names a retainable object with weak or
       //   strong lifetime, the object shall be released.
       Expr *BaseExpr = PseudoDtor->getBase();
-      llvm::Value *BaseValue = NULL;
+      llvm::Value *BaseValue = nullptr;
       Qualifiers BaseQuals;
 
       // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar.
@@ -2943,7 +3042,7 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
       EmitScalarExpr(E->getCallee());
     }
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   llvm::Value *Callee = EmitScalarExpr(E->getCallee());
@@ -3061,7 +3160,7 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
   if (!RV.isScalar())
     return MakeAddrLValue(RV.getAggregateAddr(), E->getType());
 
-  assert(E->getMethodDecl()->getResultType()->isReferenceType() &&
+  assert(E->getMethodDecl()->getReturnType()->isReferenceType() &&
          "Can't have a scalar return unless the return type is a "
          "reference type!");
 
@@ -3089,7 +3188,7 @@ LValue CodeGenFunction::EmitLValueForIvar(QualType ObjectTy,
 
 LValue CodeGenFunction::EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E) {
   // FIXME: A lot of the code below could be shared with EmitMemberExpr.
-  llvm::Value *BaseValue = 0;
+  llvm::Value *BaseValue = nullptr;
   const Expr *BaseExpr = E->getBase();
   Qualifiers BaseQuals;
   QualType ObjectTy;
@@ -3131,11 +3230,15 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
 
   CalleeType = getContext().getCanonicalType(CalleeType);
 
-  const FunctionType *FnType
-    = cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
+  const auto *FnType =
+      cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
 
   // Force column info to differentiate multiple inlined call sites on
   // the same line, analoguous to EmitCallExpr.
+  // FIXME: This is insufficient. Two calls coming from the same macro expansion
+  // will still get the same line/column and break debug info. It's possible
+  // that LLVM can be fixed to not rely on this uniqueness, at which point this
+  // workaround can be removed.
   bool ForceColumnInfo = false;
   if (const FunctionDecl* FD = dyn_cast_or_null<const FunctionDecl>(TargetDecl))
     ForceColumnInfo = FD->isInlineSpecified();
@@ -3144,6 +3247,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
       (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
     if (llvm::Constant *PrefixSig =
             CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
+      SanitizerScope SanScope(this);
       llvm::Constant *FTRTTIConst =
           CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true);
       llvm::Type *PrefixStructTyElems[] = {
@@ -3231,8 +3335,8 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
   const MemberPointerType *MPT
     = E->getRHS()->getType()->getAs<MemberPointerType>();
 
-  llvm::Value *AddV =
-    CGM.getCXXABI().EmitMemberDataPointerAddress(*this, BaseV, OffsetV, MPT);
+  llvm::Value *AddV = CGM.getCXXABI().EmitMemberDataPointerAddress(
+      *this, E, BaseV, OffsetV, MPT);
 
   return MakeAddrLValue(AddV, MPT->getPointeeType());
 }
@@ -3288,7 +3392,7 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
 
     // If this semantic expression is an opaque value, bind it
     // to the result of its source expression.
-    if (const OpaqueValueExpr *ov = dyn_cast<OpaqueValueExpr>(semantic)) {
+    if (const auto *ov = dyn_cast<OpaqueValueExpr>(semantic)) {
 
       // If this is the result expression, we may need to evaluate
       // directly into the slot.
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 9d0f3a9661a6..4cf94c033bb4 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -370,6 +370,29 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
   }
 }
 
+/// \brief Determine if E is a trivial array filler, that is, one that is
+/// equivalent to zero-initialization.
+static bool isTrivialFiller(Expr *E) {
+  if (!E)
+    return true;
+
+  if (isa<ImplicitValueInitExpr>(E))
+    return true;
+
+  if (auto *ILE = dyn_cast<InitListExpr>(E)) {
+    if (ILE->getNumInits())
+      return false;
+    return isTrivialFiller(ILE->getArrayFiller());
+  }
+
+  if (auto *Cons = dyn_cast_or_null<CXXConstructExpr>(E))
+    return Cons->getConstructor()->isDefaultConstructor() &&
+           Cons->getConstructor()->isTrivial();
+
+  // FIXME: Are there other cases where we can avoid emitting an initializer?
+  return false;
+}
+
 /// \brief Emit initialization of an array from an initializer list.
 void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
                                    QualType elementType, InitListExpr *E) {
@@ -389,9 +412,9 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
   // already-constructed members if an initializer throws.
   // For that, we'll need an EH cleanup.
   QualType::DestructionKind dtorKind = elementType.isDestructedType();
-  llvm::AllocaInst *endOfInit = 0;
+  llvm::AllocaInst *endOfInit = nullptr;
   EHScopeStack::stable_iterator cleanup;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::Instruction *cleanupDominator = nullptr;
   if (CGF.needsEHCleanup(dtorKind)) {
     // In principle we could tell the cleanup where we are more
     // directly, but the control flow can get so varied here that it
@@ -435,14 +458,8 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
   }
 
   // Check whether there's a non-trivial array-fill expression.
-  // Note that this will be a CXXConstructExpr even if the element
-  // type is an array (or array of array, etc.) of class type.
   Expr *filler = E->getArrayFiller();
-  bool hasTrivialFiller = true;
-  if (CXXConstructExpr *cons = dyn_cast_or_null<CXXConstructExpr>(filler)) {
-    assert(cons->getConstructor()->isDefaultConstructor());
-    hasTrivialFiller = cons->getConstructor()->isTrivial();
-  }
+  bool hasTrivialFiller = isTrivialFiller(filler);
 
   // Any remaining elements need to be zero-initialized, possibly
   // using the filler expression.  We can skip this if the we're
@@ -539,7 +556,7 @@ static Expr *findPeephole(Expr *op, CastKind kind) {
       if (castE->getCastKind() == CK_NoOp)
         continue;
     }
-    return 0;
+    return nullptr;
   }
 }
 
@@ -713,6 +730,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   case CK_CopyAndAutoreleaseBlockObject:
   case CK_BuiltinFnToFnPtr:
   case CK_ZeroToOCLEvent:
+  case CK_AddressSpaceConversion:
     llvm_unreachable("cast kind invalid for aggregate types");
   }
 }
@@ -891,14 +909,16 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
 
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
 
   // Save whether the destination's lifetime is externally managed.
   bool isExternallyDestructed = Dest.isExternallyDestructed();
 
   eval.begin(CGF);
   CGF.EmitBlock(LHSBlock);
+  Cnt.beginRegion(Builder);
   Visit(E->getTrueExpr());
   eval.end(CGF);
 
@@ -928,7 +948,11 @@ void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
   llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType());
 
   if (!ArgPtr) {
-    CGF.ErrorUnsupported(VE, "aggregate va_arg expression");
+    // If EmitVAArg fails, we fall back to the LLVM instruction.
+    llvm::Value *Val =
+        Builder.CreateVAArg(ArgValue, CGF.ConvertType(VE->getType()));
+    if (!Dest.isIgnored())
+      Builder.CreateStore(Val, Dest.getAddr());
     return;
   }
 
@@ -1037,7 +1061,7 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) {
     return;
   case TEK_Scalar:
     if (LV.isSimple()) {
-      CGF.EmitScalarInit(E, /*D=*/0, LV, /*Captured=*/false);
+      CGF.EmitScalarInit(E, /*D=*/nullptr, LV, /*Captured=*/false);
     } else {
       CGF.EmitStoreThroughLValue(RValue::get(CGF.EmitScalarExpr(E)), LV);
     }
@@ -1113,6 +1137,16 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
     return;
   }
 
+  if (E->getType()->isAtomicType()) {
+    // An _Atomic(T) object can be list-initialized from an expression
+    // of the same type.
+    assert(E->getNumInits() == 1 &&
+           CGF.getContext().hasSameUnqualifiedType(E->getInit(0)->getType(),
+                                                   E->getType()) &&
+           "unexpected list initialization for atomic object");
+    return Visit(E->getInit(0));
+  }
+
   assert(E->getType()->isRecordType() && "Only support structs/unions here!");
 
   // Do struct initialization; this code just sets each individual member
@@ -1134,9 +1168,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
 #ifndef NDEBUG
       // Make sure that it's really an empty and not a failure of
       // semantic analysis.
-      for (RecordDecl::field_iterator Field = record->field_begin(),
-                                   FieldEnd = record->field_end();
-           Field != FieldEnd; ++Field)
+      for (const auto *Field : record->fields())
         assert(Field->isUnnamedBitfield() && "Only unnamed bitfields allowed");
 #endif
       return;
@@ -1160,14 +1192,12 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
   // We'll need to enter cleanup scopes in case any of the member
   // initializers throw an exception.
   SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::Instruction *cleanupDominator = nullptr;
 
   // Here we iterate over the fields; this makes it simpler to both
   // default-initialize fields and skip over unnamed fields.
   unsigned curInitIndex = 0;
-  for (RecordDecl::field_iterator field = record->field_begin(),
-                               fieldEnd = record->field_end();
-       field != fieldEnd; ++field) {
+  for (const auto *field : record->fields()) {
     // We're done once we hit the flexible array member.
     if (field->getType()->isIncompleteArrayType())
       break;
@@ -1184,7 +1214,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
       break;
     
 
-    LValue LV = CGF.EmitLValueForFieldInitialization(DestLV, *field);
+    LValue LV = CGF.EmitLValueForFieldInitialization(DestLV, field);
     // We never generate write-barries for initialized fields.
     LV.setNonGC(true);
     
@@ -1249,7 +1279,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
   // If this is an initlist expr, sum up the size of sizes of the (present)
   // elements.  If this is something weird, assume the whole thing is non-zero.
   const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
-  if (ILE == 0 || !CGF.getTypes().isZeroInitializable(ILE->getType()))
+  if (!ILE || !CGF.getTypes().isZeroInitializable(ILE->getType()))
     return CGF.getContext().getTypeSizeInChars(E->getType());
   
   // InitListExprs for structs have to be handled carefully.  If there are
@@ -1261,8 +1291,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
       CharUnits NumNonZeroBytes = CharUnits::Zero();
       
       unsigned ILEElement = 0;
-      for (RecordDecl::field_iterator Field = SD->field_begin(),
-           FieldEnd = SD->field_end(); Field != FieldEnd; ++Field) {
+      for (const auto *Field : SD->fields()) {
         // We're done once we hit the flexible array member or run out of
         // InitListExpr elements.
         if (Field->getType()->isIncompleteArrayType() ||
@@ -1299,7 +1328,8 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
                                      CodeGenFunction &CGF) {
   // If the slot is already known to be zeroed, nothing to do.  Don't mess with
   // volatile stores.
-  if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == 0) return;
+  if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == nullptr)
+    return;
 
   // C++ objects with a user-declared constructor don't need zero'ing.
   if (CGF.getLangOpts().CPlusPlus)
@@ -1346,7 +1376,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
 void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) {
   assert(E && hasAggregateEvaluationKind(E->getType()) &&
          "Invalid aggregate expression to emit");
-  assert((Slot.getAddr() != 0 || Slot.isIgnored()) &&
+  assert((Slot.getAddr() != nullptr || Slot.isIgnored()) &&
          "slot has bits but no address");
 
   // Optimize the slot if possible.
@@ -1466,10 +1496,10 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr,
   // memcpy, as well as the TBAA tags for the members of the struct, in case
   // the optimizer wishes to expand it in to scalar memory operations.
   llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty);
-  
+
   Builder.CreateMemCpy(DestPtr, SrcPtr,
                        llvm::ConstantInt::get(IntPtrTy, 
                                               TypeInfo.first.getQuantity()),
                        alignment.getQuantity(), isVolatile,
-                       /*TBAATag=*/0, TBAAStructTag);
+                       /*TBAATag=*/nullptr, TBAAStructTag);
 }
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index cc7b24d5e83e..7aacee4d6ba1 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -18,8 +18,8 @@
 #include "CGObjCRuntime.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/CallSite.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -95,7 +95,7 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
   const Expr *Base = ME->getBase();
   bool CanUseVirtualCall = MD->isVirtual() && !ME->hasQualifier();
 
-  const CXXMethodDecl *DevirtualizedMethod = NULL;
+  const CXXMethodDecl *DevirtualizedMethod = nullptr;
   if (CanUseVirtualCall && CanDevirtualizeMemberFunctionCall(Base, MD)) {
     const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
     DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl);
@@ -111,7 +111,7 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
       // one or the one of the full expression, we would have to build
       // a derived-to-base cast to compute the correct this pointer, but
       // we don't have support for that yet, so do a virtual call.
-      DevirtualizedMethod = NULL;
+      DevirtualizedMethod = nullptr;
     }
     // If the return types are not the same, this might be a case where more
     // code needs to run to compensate for it. For example, the derived
@@ -119,9 +119,9 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
     // type of MD and has a prefix.
     // For now we just avoid devirtualizing these covariant cases.
     if (DevirtualizedMethod &&
-        DevirtualizedMethod->getResultType().getCanonicalType() !=
-        MD->getResultType().getCanonicalType())
-      DevirtualizedMethod = NULL;
+        DevirtualizedMethod->getReturnType().getCanonicalType() !=
+            MD->getReturnType().getCanonicalType())
+      DevirtualizedMethod = nullptr;
   }
 
   llvm::Value *This;
@@ -132,10 +132,10 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
 
 
   if (MD->isTrivial()) {
-    if (isa<CXXDestructorDecl>(MD)) return RValue::get(0);
+    if (isa<CXXDestructorDecl>(MD)) return RValue::get(nullptr);
     if (isa<CXXConstructorDecl>(MD) && 
         cast<CXXConstructorDecl>(MD)->isDefaultConstructor())
-      return RValue::get(0);
+      return RValue::get(nullptr);
 
     if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) {
       // We don't like to generate the trivial copy/move assignment operator
@@ -158,7 +158,7 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
 
   // Compute the function type we're calling.
   const CXXMethodDecl *CalleeDecl = DevirtualizedMethod ? DevirtualizedMethod : MD;
-  const CGFunctionInfo *FInfo = 0;
+  const CGFunctionInfo *FInfo = nullptr;
   if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl))
     FInfo = &CGM.getTypes().arrangeCXXDestructor(Dtor,
                                                  Dtor_Complete);
@@ -199,9 +199,9 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
         Callee = CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty);
       }
       EmitCXXMemberCall(MD, CE->getExprLoc(), Callee, ReturnValue, This,
-                        /*ImplicitParam=*/0, QualType(), 0, 0);
+                        /*ImplicitParam=*/nullptr, QualType(), nullptr,nullptr);
     }
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
   
   if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) {
@@ -220,11 +220,13 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
     }
   }
 
-  if (MD->isVirtual())
-    This = CGM.getCXXABI().adjustThisArgumentForVirtualCall(*this, MD, This);
+  if (MD->isVirtual()) {
+    This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
+        *this, MD, This, UseVirtualCall);
+  }
 
   return EmitCXXMemberCall(MD, CE->getExprLoc(), Callee, ReturnValue, This,
-                           /*ImplicitParam=*/0, QualType(),
+                           /*ImplicitParam=*/nullptr, QualType(),
                            CE->arg_begin(), CE->arg_end());
 }
 
@@ -260,7 +262,7 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
 
   // Ask the ABI to load the callee.  Note that This is modified.
   llvm::Value *Callee =
-    CGM.getCXXABI().EmitLoadOfMemberFunctionPointer(*this, This, MemFnPtr, MPT);
+    CGM.getCXXABI().EmitLoadOfMemberFunctionPointer(*this, BO, This, MemFnPtr, MPT);
   
   CallArgList Args;
 
@@ -297,7 +299,7 @@ CodeGenFunction::EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
 
   llvm::Value *Callee = EmitCXXOperatorMemberCallee(E, MD, This);
   return EmitCXXMemberCall(MD, E->getExprLoc(), Callee, ReturnValue, This,
-                           /*ImplicitParam=*/0, QualType(),
+                           /*ImplicitParam=*/nullptr, QualType(),
                            E->arg_begin() + 1, E->arg_end());
 }
 
@@ -316,7 +318,7 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF,
 
   const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(Base);
   CharUnits Size = Layout.getNonVirtualSize();
-  CharUnits Align = Layout.getNonVirtualAlign();
+  CharUnits Align = Layout.getNonVirtualAlignment();
 
   llvm::Value *SizeVal = CGF.CGM.getSize(Size);
 
@@ -584,7 +586,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF,
     //      size := sizeWithoutCookie + cookieSize
     //    and check whether it overflows.
 
-    llvm::Value *hasOverflow = 0;
+    llvm::Value *hasOverflow = nullptr;
 
     // If numElementsWidth > sizeWidth, then one way or another, we're
     // going to have to do a comparison for (2), and this happens to
@@ -724,8 +726,8 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
   CharUnits Alignment = CGF.getContext().getTypeAlignInChars(AllocType);
   switch (CGF.getEvaluationKind(AllocType)) {
   case TEK_Scalar:
-    CGF.EmitScalarInit(Init, 0, CGF.MakeAddrLValue(NewPtr, AllocType,
-                                                   Alignment),
+    CGF.EmitScalarInit(Init, nullptr, CGF.MakeAddrLValue(NewPtr, AllocType,
+                                                         Alignment),
                        false);
     return;
   case TEK_Complex:
@@ -747,185 +749,249 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
 }
 
 void
-CodeGenFunction::EmitNewArrayInitializer(const CXXNewExpr *E, 
-                                         QualType elementType,
-                                         llvm::Value *beginPtr,
-                                         llvm::Value *numElements) {
+CodeGenFunction::EmitNewArrayInitializer(const CXXNewExpr *E,
+                                         QualType ElementType,
+                                         llvm::Value *BeginPtr,
+                                         llvm::Value *NumElements,
+                                         llvm::Value *AllocSizeWithoutCookie) {
+  // If we have a type with trivial initialization and no initializer,
+  // there's nothing to do.
   if (!E->hasInitializer())
-    return; // We have a POD type.
+    return;
 
-  llvm::Value *explicitPtr = beginPtr;
-  // Find the end of the array, hoisted out of the loop.
-  llvm::Value *endPtr =
-    Builder.CreateInBoundsGEP(beginPtr, numElements, "array.end");
+  llvm::Value *CurPtr = BeginPtr;
 
-  unsigned initializerElements = 0;
+  unsigned InitListElements = 0;
 
   const Expr *Init = E->getInitializer();
-  llvm::AllocaInst *endOfInit = 0;
-  QualType::DestructionKind dtorKind = elementType.isDestructedType();
-  EHScopeStack::stable_iterator cleanup;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::AllocaInst *EndOfInit = nullptr;
+  QualType::DestructionKind DtorKind = ElementType.isDestructedType();
+  EHScopeStack::stable_iterator Cleanup;
+  llvm::Instruction *CleanupDominator = nullptr;
 
   // If the initializer is an initializer list, first do the explicit elements.
   if (const InitListExpr *ILE = dyn_cast<InitListExpr>(Init)) {
-    initializerElements = ILE->getNumInits();
+    InitListElements = ILE->getNumInits();
 
     // If this is a multi-dimensional array new, we will initialize multiple
     // elements with each init list element.
     QualType AllocType = E->getAllocatedType();
     if (const ConstantArrayType *CAT = dyn_cast_or_null<ConstantArrayType>(
             AllocType->getAsArrayTypeUnsafe())) {
-      unsigned AS = explicitPtr->getType()->getPointerAddressSpace();
+      unsigned AS = CurPtr->getType()->getPointerAddressSpace();
       llvm::Type *AllocPtrTy = ConvertTypeForMem(AllocType)->getPointerTo(AS);
-      explicitPtr = Builder.CreateBitCast(explicitPtr, AllocPtrTy);
-      initializerElements *= getContext().getConstantArrayElementCount(CAT);
+      CurPtr = Builder.CreateBitCast(CurPtr, AllocPtrTy);
+      InitListElements *= getContext().getConstantArrayElementCount(CAT);
     }
 
-    // Enter a partial-destruction cleanup if necessary.
-    if (needsEHCleanup(dtorKind)) {
-      // In principle we could tell the cleanup where we are more
+    // Enter a partial-destruction Cleanup if necessary.
+    if (needsEHCleanup(DtorKind)) {
+      // In principle we could tell the Cleanup where we are more
       // directly, but the control flow can get so varied here that it
       // would actually be quite complex.  Therefore we go through an
       // alloca.
-      endOfInit = CreateTempAlloca(beginPtr->getType(), "array.endOfInit");
-      cleanupDominator = Builder.CreateStore(beginPtr, endOfInit);
-      pushIrregularPartialArrayCleanup(beginPtr, endOfInit, elementType,
-                                       getDestroyer(dtorKind));
-      cleanup = EHStack.stable_begin();
+      EndOfInit = CreateTempAlloca(BeginPtr->getType(), "array.init.end");
+      CleanupDominator = Builder.CreateStore(BeginPtr, EndOfInit);
+      pushIrregularPartialArrayCleanup(BeginPtr, EndOfInit, ElementType,
+                                       getDestroyer(DtorKind));
+      Cleanup = EHStack.stable_begin();
     }
 
     for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i) {
       // Tell the cleanup that it needs to destroy up to this
       // element.  TODO: some of these stores can be trivially
       // observed to be unnecessary.
-      if (endOfInit) Builder.CreateStore(explicitPtr, endOfInit);
+      if (EndOfInit)
+        Builder.CreateStore(Builder.CreateBitCast(CurPtr, BeginPtr->getType()),
+                            EndOfInit);
+      // FIXME: If the last initializer is an incomplete initializer list for
+      // an array, and we have an array filler, we can fold together the two
+      // initialization loops.
       StoreAnyExprIntoOneUnit(*this, ILE->getInit(i),
-                              ILE->getInit(i)->getType(), explicitPtr);
-      explicitPtr = Builder.CreateConstGEP1_32(explicitPtr, 1,
-                                               "array.exp.next");
+                              ILE->getInit(i)->getType(), CurPtr);
+      CurPtr = Builder.CreateConstInBoundsGEP1_32(CurPtr, 1, "array.exp.next");
     }
 
     // The remaining elements are filled with the array filler expression.
     Init = ILE->getArrayFiller();
 
-    explicitPtr = Builder.CreateBitCast(explicitPtr, beginPtr->getType());
+    // Extract the initializer for the individual array elements by pulling
+    // out the array filler from all the nested initializer lists. This avoids
+    // generating a nested loop for the initialization.
+    while (Init && Init->getType()->isConstantArrayType()) {
+      auto *SubILE = dyn_cast<InitListExpr>(Init);
+      if (!SubILE)
+        break;
+      assert(SubILE->getNumInits() == 0 && "explicit inits in array filler?");
+      Init = SubILE->getArrayFiller();
+    }
+
+    // Switch back to initializing one base element at a time.
+    CurPtr = Builder.CreateBitCast(CurPtr, BeginPtr->getType());
   }
 
-  // Create the continuation block.
-  llvm::BasicBlock *contBB = createBasicBlock("new.loop.end");
+  // Attempt to perform zero-initialization using memset.
+  auto TryMemsetInitialization = [&]() -> bool {
+    // FIXME: If the type is a pointer-to-data-member under the Itanium ABI,
+    // we can initialize with a memset to -1.
+    if (!CGM.getTypes().isZeroInitializable(ElementType))
+      return false;
+
+    // Optimization: since zero initialization will just set the memory
+    // to all zeroes, generate a single memset to do it in one shot.
+
+    // Subtract out the size of any elements we've already initialized.
+    auto *RemainingSize = AllocSizeWithoutCookie;
+    if (InitListElements) {
+      // We know this can't overflow; we check this when doing the allocation.
+      auto *InitializedSize = llvm::ConstantInt::get(
+          RemainingSize->getType(),
+          getContext().getTypeSizeInChars(ElementType).getQuantity() *
+              InitListElements);
+      RemainingSize = Builder.CreateSub(RemainingSize, InitializedSize);
+    }
+
+    // Create the memset.
+    CharUnits Alignment = getContext().getTypeAlignInChars(ElementType);
+    Builder.CreateMemSet(CurPtr, Builder.getInt8(0), RemainingSize,
+                         Alignment.getQuantity(), false);
+    return true;
+  };
+
+  // If all elements have already been initialized, skip any further
+  // initialization.
+  llvm::ConstantInt *ConstNum = dyn_cast<llvm::ConstantInt>(NumElements);
+  if (ConstNum && ConstNum->getZExtValue() <= InitListElements) {
+    // If there was a Cleanup, deactivate it.
+    if (CleanupDominator)
+      DeactivateCleanupBlock(Cleanup, CleanupDominator);
+    return;
+  }
+
+  assert(Init && "have trailing elements to initialize but no initializer");
+
+  // If this is a constructor call, try to optimize it out, and failing that
+  // emit a single loop to initialize all remaining elements.
+  if (const CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(Init)) {
+    CXXConstructorDecl *Ctor = CCE->getConstructor();
+    if (Ctor->isTrivial()) {
+      // If new expression did not specify value-initialization, then there
+      // is no initialization.
+      if (!CCE->requiresZeroInitialization() || Ctor->getParent()->isEmpty())
+        return;
+
+      if (TryMemsetInitialization())
+        return;
+    }
+
+    // Store the new Cleanup position for irregular Cleanups.
+    //
+    // FIXME: Share this cleanup with the constructor call emission rather than
+    // having it create a cleanup of its own.
+    if (EndOfInit) Builder.CreateStore(CurPtr, EndOfInit);
+
+    // Emit a constructor call loop to initialize the remaining elements.
+    if (InitListElements)
+      NumElements = Builder.CreateSub(
+          NumElements,
+          llvm::ConstantInt::get(NumElements->getType(), InitListElements));
+    EmitCXXAggrConstructorCall(Ctor, NumElements, CurPtr,
+                               CCE->arg_begin(), CCE->arg_end(),
+                               CCE->requiresZeroInitialization());
+    return;
+  }
+
+  // If this is value-initialization, we can usually use memset.
+  ImplicitValueInitExpr IVIE(ElementType);
+  if (isa<ImplicitValueInitExpr>(Init)) {
+    if (TryMemsetInitialization())
+      return;
+
+    // Switch to an ImplicitValueInitExpr for the element type. This handles
+    // only one case: multidimensional array new of pointers to members. In
+    // all other cases, we already have an initializer for the array element.
+    Init = &IVIE;
+  }
+
+  // At this point we should have found an initializer for the individual
+  // elements of the array.
+  assert(getContext().hasSameUnqualifiedType(ElementType, Init->getType()) &&
+         "got wrong type of element to initialize");
+
+  // If we have an empty initializer list, we can usually use memset.
+  if (auto *ILE = dyn_cast<InitListExpr>(Init))
+    if (ILE->getNumInits() == 0 && TryMemsetInitialization())
+      return;
+
+  // Create the loop blocks.
+  llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
+  llvm::BasicBlock *LoopBB = createBasicBlock("new.loop");
+  llvm::BasicBlock *ContBB = createBasicBlock("new.loop.end");
+
+  // Find the end of the array, hoisted out of the loop.
+  llvm::Value *EndPtr =
+    Builder.CreateInBoundsGEP(BeginPtr, NumElements, "array.end");
 
   // If the number of elements isn't constant, we have to now check if there is
   // anything left to initialize.
-  if (llvm::ConstantInt *constNum = dyn_cast<llvm::ConstantInt>(numElements)) {
-    // If all elements have already been initialized, skip the whole loop.
-    if (constNum->getZExtValue() <= initializerElements) {
-      // If there was a cleanup, deactivate it.
-      if (cleanupDominator)
-        DeactivateCleanupBlock(cleanup, cleanupDominator);
-      return;
-    }
-  } else {
-    llvm::BasicBlock *nonEmptyBB = createBasicBlock("new.loop.nonempty");
-    llvm::Value *isEmpty = Builder.CreateICmpEQ(explicitPtr, endPtr,
+  if (!ConstNum) {
+    llvm::Value *IsEmpty = Builder.CreateICmpEQ(CurPtr, EndPtr,
                                                 "array.isempty");
-    Builder.CreateCondBr(isEmpty, contBB, nonEmptyBB);
-    EmitBlock(nonEmptyBB);
+    Builder.CreateCondBr(IsEmpty, ContBB, LoopBB);
   }
 
   // Enter the loop.
-  llvm::BasicBlock *entryBB = Builder.GetInsertBlock();
-  llvm::BasicBlock *loopBB = createBasicBlock("new.loop");
-
-  EmitBlock(loopBB);
+  EmitBlock(LoopBB);
 
   // Set up the current-element phi.
-  llvm::PHINode *curPtr =
-    Builder.CreatePHI(explicitPtr->getType(), 2, "array.cur");
-  curPtr->addIncoming(explicitPtr, entryBB);
-
-  // Store the new cleanup position for irregular cleanups.
-  if (endOfInit) Builder.CreateStore(curPtr, endOfInit);
-
-  // Enter a partial-destruction cleanup if necessary.
-  if (!cleanupDominator && needsEHCleanup(dtorKind)) {
-    pushRegularPartialArrayCleanup(beginPtr, curPtr, elementType,
-                                   getDestroyer(dtorKind));
-    cleanup = EHStack.stable_begin();
-    cleanupDominator = Builder.CreateUnreachable();
+  llvm::PHINode *CurPtrPhi =
+    Builder.CreatePHI(CurPtr->getType(), 2, "array.cur");
+  CurPtrPhi->addIncoming(CurPtr, EntryBB);
+  CurPtr = CurPtrPhi;
+
+  // Store the new Cleanup position for irregular Cleanups.
+  if (EndOfInit) Builder.CreateStore(CurPtr, EndOfInit);
+
+  // Enter a partial-destruction Cleanup if necessary.
+  if (!CleanupDominator && needsEHCleanup(DtorKind)) {
+    pushRegularPartialArrayCleanup(BeginPtr, CurPtr, ElementType,
+                                   getDestroyer(DtorKind));
+    Cleanup = EHStack.stable_begin();
+    CleanupDominator = Builder.CreateUnreachable();
   }
 
   // Emit the initializer into this element.
-  StoreAnyExprIntoOneUnit(*this, Init, E->getAllocatedType(), curPtr);
+  StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr);
 
-  // Leave the cleanup if we entered one.
-  if (cleanupDominator) {
-    DeactivateCleanupBlock(cleanup, cleanupDominator);
-    cleanupDominator->eraseFromParent();
+  // Leave the Cleanup if we entered one.
+  if (CleanupDominator) {
+    DeactivateCleanupBlock(Cleanup, CleanupDominator);
+    CleanupDominator->eraseFromParent();
   }
 
-  // Advance to the next element.
-  llvm::Value *nextPtr = Builder.CreateConstGEP1_32(curPtr, 1, "array.next");
+  // Advance to the next element by adjusting the pointer type as necessary.
+  llvm::Value *NextPtr =
+      Builder.CreateConstInBoundsGEP1_32(CurPtr, 1, "array.next");
 
   // Check whether we've gotten to the end of the array and, if so,
   // exit the loop.
-  llvm::Value *isEnd = Builder.CreateICmpEQ(nextPtr, endPtr, "array.atend");
-  Builder.CreateCondBr(isEnd, contBB, loopBB);
-  curPtr->addIncoming(nextPtr, Builder.GetInsertBlock());
+  llvm::Value *IsEnd = Builder.CreateICmpEQ(NextPtr, EndPtr, "array.atend");
+  Builder.CreateCondBr(IsEnd, ContBB, LoopBB);
+  CurPtrPhi->addIncoming(NextPtr, Builder.GetInsertBlock());
 
-  EmitBlock(contBB);
+  EmitBlock(ContBB);
 }
 
-static void EmitZeroMemSet(CodeGenFunction &CGF, QualType T,
-                           llvm::Value *NewPtr, llvm::Value *Size) {
-  CGF.EmitCastToVoidPtr(NewPtr);
-  CharUnits Alignment = CGF.getContext().getTypeAlignInChars(T);
-  CGF.Builder.CreateMemSet(NewPtr, CGF.Builder.getInt8(0), Size,
-                           Alignment.getQuantity(), false);
-}
-                       
 static void EmitNewInitializer(CodeGenFunction &CGF, const CXXNewExpr *E,
                                QualType ElementType,
                                llvm::Value *NewPtr,
                                llvm::Value *NumElements,
                                llvm::Value *AllocSizeWithoutCookie) {
-  const Expr *Init = E->getInitializer();
-  if (E->isArray()) {
-    if (const CXXConstructExpr *CCE = dyn_cast_or_null<CXXConstructExpr>(Init)){
-      CXXConstructorDecl *Ctor = CCE->getConstructor();
-      if (Ctor->isTrivial()) {
-        // If new expression did not specify value-initialization, then there
-        // is no initialization.
-        if (!CCE->requiresZeroInitialization() || Ctor->getParent()->isEmpty())
-          return;
-      
-        if (CGF.CGM.getTypes().isZeroInitializable(ElementType)) {
-          // Optimization: since zero initialization will just set the memory
-          // to all zeroes, generate a single memset to do it in one shot.
-          EmitZeroMemSet(CGF, ElementType, NewPtr, AllocSizeWithoutCookie);
-          return;
-        }
-      }
-
-      CGF.EmitCXXAggrConstructorCall(Ctor, NumElements, NewPtr,
-                                     CCE->arg_begin(),  CCE->arg_end(),
-                                     CCE->requiresZeroInitialization());
-      return;
-    } else if (Init && isa<ImplicitValueInitExpr>(Init) &&
-               CGF.CGM.getTypes().isZeroInitializable(ElementType)) {
-      // Optimization: since zero initialization will just set the memory
-      // to all zeroes, generate a single memset to do it in one shot.
-      EmitZeroMemSet(CGF, ElementType, NewPtr, AllocSizeWithoutCookie);
-      return;
-    }
-    CGF.EmitNewArrayInitializer(E, ElementType, NewPtr, NumElements);
-    return;
-  }
-
-  if (!Init)
-    return;
-
-  StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr);
+  if (E->isArray())
+    CGF.EmitNewArrayInitializer(E, ElementType, NewPtr, NumElements,
+                                AllocSizeWithoutCookie);
+  else if (const Expr *Init = E->getInitializer())
+    StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr);
 }
 
 /// Emit a call to an operator new or operator delete function, as implicitly
@@ -963,6 +1029,24 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
   return RV;
 }
 
+RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
+                                                 const Expr *Arg,
+                                                 bool IsDelete) {
+  CallArgList Args;
+  const Stmt *ArgS = Arg;
+  EmitCallArgs(Args, *Type->param_type_begin(),
+               ConstExprIterator(&ArgS), ConstExprIterator(&ArgS + 1));
+  // Find the allocation or deallocation function that we're calling.
+  ASTContext &Ctx = getContext();
+  DeclarationName Name = Ctx.DeclarationNames
+      .getCXXOperatorName(IsDelete ? OO_Delete : OO_New);
+  for (auto *Decl : Ctx.getTranslationUnitDecl()->lookup(Name))
+    if (auto *FD = dyn_cast<FunctionDecl>(Decl))
+      if (Ctx.hasSameType(FD->getType(), QualType(Type, 0)))
+        return EmitNewDeleteCall(*this, cast<FunctionDecl>(Decl), Type, Args);
+  llvm_unreachable("predeclared global operator new/delete is missing");
+}
+
 namespace {
   /// A cleanup to call the given 'operator delete' function upon
   /// abnormal exit from a new expression.
@@ -991,20 +1075,20 @@ namespace {
       getPlacementArgs()[I] = Arg;
     }
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       const FunctionProtoType *FPT
         = OperatorDelete->getType()->getAs<FunctionProtoType>();
-      assert(FPT->getNumArgs() == NumPlacementArgs + 1 ||
-             (FPT->getNumArgs() == 2 && NumPlacementArgs == 0));
+      assert(FPT->getNumParams() == NumPlacementArgs + 1 ||
+             (FPT->getNumParams() == 2 && NumPlacementArgs == 0));
 
       CallArgList DeleteArgs;
 
       // The first argument is always a void*.
-      FunctionProtoType::arg_type_iterator AI = FPT->arg_type_begin();
+      FunctionProtoType::param_type_iterator AI = FPT->param_type_begin();
       DeleteArgs.add(RValue::get(Ptr), *AI++);
 
       // A member 'operator delete' can take an extra 'size_t' argument.
-      if (FPT->getNumArgs() == NumPlacementArgs + 2)
+      if (FPT->getNumParams() == NumPlacementArgs + 2)
         DeleteArgs.add(RValue::get(AllocSize), *AI++);
 
       // Pass the rest of the arguments, which must match exactly.
@@ -1046,20 +1130,20 @@ namespace {
       getPlacementArgs()[I] = Arg;
     }
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       const FunctionProtoType *FPT
         = OperatorDelete->getType()->getAs<FunctionProtoType>();
-      assert(FPT->getNumArgs() == NumPlacementArgs + 1 ||
-             (FPT->getNumArgs() == 2 && NumPlacementArgs == 0));
+      assert(FPT->getNumParams() == NumPlacementArgs + 1 ||
+             (FPT->getNumParams() == 2 && NumPlacementArgs == 0));
 
       CallArgList DeleteArgs;
 
       // The first argument is always a void*.
-      FunctionProtoType::arg_type_iterator AI = FPT->arg_type_begin();
+      FunctionProtoType::param_type_iterator AI = FPT->param_type_begin();
       DeleteArgs.add(Ptr.restore(CGF), *AI++);
 
       // A member 'operator delete' can take an extra 'size_t' argument.
-      if (FPT->getNumArgs() == NumPlacementArgs + 2) {
+      if (FPT->getNumParams() == NumPlacementArgs + 2) {
         RValue RV = AllocSize.restore(CGF);
         DeleteArgs.add(RV, *AI++);
       }
@@ -1137,43 +1221,20 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
       minElements = ILE->getNumInits();
   }
 
-  llvm::Value *numElements = 0;
-  llvm::Value *allocSizeWithoutCookie = 0;
+  llvm::Value *numElements = nullptr;
+  llvm::Value *allocSizeWithoutCookie = nullptr;
   llvm::Value *allocSize =
     EmitCXXNewAllocSize(*this, E, minElements, numElements,
                         allocSizeWithoutCookie);
   
   allocatorArgs.add(RValue::get(allocSize), sizeType);
 
-  // Emit the rest of the arguments.
-  // FIXME: Ideally, this should just use EmitCallArgs.
-  CXXNewExpr::const_arg_iterator placementArg = E->placement_arg_begin();
-
-  // First, use the types from the function type.
   // We start at 1 here because the first argument (the allocation size)
   // has already been emitted.
-  for (unsigned i = 1, e = allocatorType->getNumArgs(); i != e;
-       ++i, ++placementArg) {
-    QualType argType = allocatorType->getArgType(i);
-
-    assert(getContext().hasSameUnqualifiedType(argType.getNonReferenceType(),
-                                               placementArg->getType()) &&
-           "type mismatch in call argument!");
-
-    EmitCallArg(allocatorArgs, *placementArg, argType);
-  }
-
-  // Either we've emitted all the call args, or we have a call to a
-  // variadic function.
-  assert((placementArg == E->placement_arg_end() ||
-          allocatorType->isVariadic()) &&
-         "Extra arguments to non-variadic function!");
-
-  // If we still have any arguments, emit them using the type of the argument.
-  for (CXXNewExpr::const_arg_iterator placementArgsEnd = E->placement_arg_end();
-       placementArg != placementArgsEnd; ++placementArg) {
-    EmitCallArg(allocatorArgs, *placementArg, placementArg->getType());
-  }
+  EmitCallArgs(allocatorArgs, allocatorType->isVariadic(),
+               allocatorType->param_type_begin() + 1,
+               allocatorType->param_type_end(), E->placement_arg_begin(),
+               E->placement_arg_end());
 
   // Emit the allocation call.  If the allocator is a global placement
   // operator, just "inline" it directly.
@@ -1195,8 +1256,8 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
   bool nullCheck = allocatorType->isNothrow(getContext()) &&
     (!allocType.isPODType(getContext()) || E->hasInitializer());
 
-  llvm::BasicBlock *nullCheckBB = 0;
-  llvm::BasicBlock *contBB = 0;
+  llvm::BasicBlock *nullCheckBB = nullptr;
+  llvm::BasicBlock *contBB = nullptr;
 
   llvm::Value *allocation = RV.getScalarVal();
   unsigned AS = allocation->getType()->getPointerAddressSpace();
@@ -1220,7 +1281,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
   // If there's an operator delete, enter a cleanup to call it if an
   // exception is thrown.
   EHScopeStack::stable_iterator operatorDeleteCleanup;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::Instruction *cleanupDominator = nullptr;
   if (E->getOperatorDelete() &&
       !E->getOperatorDelete()->isReservedGlobalPlacementOperator()) {
     EnterNewDeleteCleanup(*this, E, allocation, allocSize, allocatorArgs);
@@ -1287,16 +1348,16 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD,
   CallArgList DeleteArgs;
 
   // Check if we need to pass the size to the delete operator.
-  llvm::Value *Size = 0;
+  llvm::Value *Size = nullptr;
   QualType SizeTy;
-  if (DeleteFTy->getNumArgs() == 2) {
-    SizeTy = DeleteFTy->getArgType(1);
+  if (DeleteFTy->getNumParams() == 2) {
+    SizeTy = DeleteFTy->getParamType(1);
     CharUnits DeleteTypeSize = getContext().getTypeSizeInChars(DeleteTy);
     Size = llvm::ConstantInt::get(ConvertType(SizeTy), 
                                   DeleteTypeSize.getQuantity());
   }
-  
-  QualType ArgTy = DeleteFTy->getArgType(0);
+
+  QualType ArgTy = DeleteFTy->getParamType(0);
   llvm::Value *DeletePtr = Builder.CreateBitCast(Ptr, ConvertType(ArgTy));
   DeleteArgs.add(RValue::get(DeletePtr), ArgTy);
 
@@ -1319,7 +1380,7 @@ namespace {
                      QualType ElementType)
       : Ptr(Ptr), OperatorDelete(OperatorDelete), ElementType(ElementType) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitDeleteCall(OperatorDelete, Ptr, ElementType);
     }
   };
@@ -1333,7 +1394,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
                              bool UseGlobalDelete) {
   // Find the destructor for the type, if applicable.  If the
   // destructor is virtual, we'll just emit the vcall and return.
-  const CXXDestructorDecl *Dtor = 0;
+  const CXXDestructorDecl *Dtor = nullptr;
   if (const RecordType *RT = ElementType->getAs<RecordType>()) {
     CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
     if (RD->hasDefinition() && !RD->hasTrivialDestructor()) {
@@ -1422,22 +1483,22 @@ namespace {
       : Ptr(Ptr), OperatorDelete(OperatorDelete), NumElements(NumElements),
         ElementType(ElementType), CookieSize(CookieSize) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       const FunctionProtoType *DeleteFTy =
         OperatorDelete->getType()->getAs<FunctionProtoType>();
-      assert(DeleteFTy->getNumArgs() == 1 || DeleteFTy->getNumArgs() == 2);
+      assert(DeleteFTy->getNumParams() == 1 || DeleteFTy->getNumParams() == 2);
 
       CallArgList Args;
       
       // Pass the pointer as the first argument.
-      QualType VoidPtrTy = DeleteFTy->getArgType(0);
+      QualType VoidPtrTy = DeleteFTy->getParamType(0);
       llvm::Value *DeletePtr
         = CGF.Builder.CreateBitCast(Ptr, CGF.ConvertType(VoidPtrTy));
       Args.add(RValue::get(DeletePtr), VoidPtrTy);
 
       // Pass the original requested size as the second argument.
-      if (DeleteFTy->getNumArgs() == 2) {
-        QualType size_t = DeleteFTy->getArgType(1);
+      if (DeleteFTy->getNumParams() == 2) {
+        QualType size_t = DeleteFTy->getParamType(1);
         llvm::IntegerType *SizeTy
           = cast<llvm::IntegerType>(CGF.ConvertType(size_t));
         
@@ -1470,8 +1531,8 @@ static void EmitArrayDelete(CodeGenFunction &CGF,
                             const CXXDeleteExpr *E,
                             llvm::Value *deletedPtr,
                             QualType elementType) {
-  llvm::Value *numElements = 0;
-  llvm::Value *allocatedPtr = 0;
+  llvm::Value *numElements = nullptr;
+  llvm::Value *allocatedPtr = nullptr;
   CharUnits cookieSize;
   CGF.CGM.getCXXABI().ReadArrayCookie(CGF, deletedPtr, E, elementType,
                                       numElements, allocatedPtr, cookieSize);
@@ -1554,21 +1615,39 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
   EmitBlock(DeleteEnd);
 }
 
-static llvm::Constant *getBadTypeidFn(CodeGenFunction &CGF) {
-  // void __cxa_bad_typeid();
-  llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);
-  
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_typeid");
-}
+static bool isGLValueFromPointerDeref(const Expr *E) {
+  E = E->IgnoreParens();
+
+  if (const auto *CE = dyn_cast<CastExpr>(E)) {
+    if (!CE->getSubExpr()->isGLValue())
+      return false;
+    return isGLValueFromPointerDeref(CE->getSubExpr());
+  }
+
+  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(E))
+    return isGLValueFromPointerDeref(OVE->getSourceExpr());
+
+  if (const auto *BO = dyn_cast<BinaryOperator>(E))
+    if (BO->getOpcode() == BO_Comma)
+      return isGLValueFromPointerDeref(BO->getRHS());
+
+  if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(E))
+    return isGLValueFromPointerDeref(ACO->getTrueExpr()) ||
+           isGLValueFromPointerDeref(ACO->getFalseExpr());
+
+  // C++11 [expr.sub]p1:
+  //   The expression E1[E2] is identical (by definition) to *((E1)+(E2))
+  if (isa<ArraySubscriptExpr>(E))
+    return true;
 
-static void EmitBadTypeidCall(CodeGenFunction &CGF) {
-  llvm::Value *Fn = getBadTypeidFn(CGF);
-  CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
-  CGF.Builder.CreateUnreachable();
+  if (const auto *UO = dyn_cast<UnaryOperator>(E))
+    if (UO->getOpcode() == UO_Deref)
+      return true;
+
+  return false;
 }
 
-static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF,
-                                         const Expr *E, 
+static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,
                                          llvm::Type *StdTypeInfoPtrTy) {
   // Get the vtable pointer.
   llvm::Value *ThisPtr = CGF.EmitLValue(E).getAddress();
@@ -1577,28 +1656,27 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF,
   //   If the glvalue expression is obtained by applying the unary * operator to
   //   a pointer and the pointer is a null pointer value, the typeid expression
   //   throws the std::bad_typeid exception.
-  if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E->IgnoreParens())) {
-    if (UO->getOpcode() == UO_Deref) {
-      llvm::BasicBlock *BadTypeidBlock = 
+  //
+  // However, this paragraph's intent is not clear.  We choose a very generous
+  // interpretation which implores us to consider comma operators, conditional
+  // operators, parentheses and other such constructs.
+  QualType SrcRecordTy = E->getType();
+  if (CGF.CGM.getCXXABI().shouldTypeidBeNullChecked(
+          isGLValueFromPointerDeref(E), SrcRecordTy)) {
+    llvm::BasicBlock *BadTypeidBlock =
         CGF.createBasicBlock("typeid.bad_typeid");
-      llvm::BasicBlock *EndBlock =
-        CGF.createBasicBlock("typeid.end");
+    llvm::BasicBlock *EndBlock = CGF.createBasicBlock("typeid.end");
 
-      llvm::Value *IsNull = CGF.Builder.CreateIsNull(ThisPtr);
-      CGF.Builder.CreateCondBr(IsNull, BadTypeidBlock, EndBlock);
+    llvm::Value *IsNull = CGF.Builder.CreateIsNull(ThisPtr);
+    CGF.Builder.CreateCondBr(IsNull, BadTypeidBlock, EndBlock);
 
-      CGF.EmitBlock(BadTypeidBlock);
-      EmitBadTypeidCall(CGF);
-      CGF.EmitBlock(EndBlock);
-    }
+    CGF.EmitBlock(BadTypeidBlock);
+    CGF.CGM.getCXXABI().EmitBadTypeidCall(CGF);
+    CGF.EmitBlock(EndBlock);
   }
 
-  llvm::Value *Value = CGF.GetVTablePtr(ThisPtr, 
-                                        StdTypeInfoPtrTy->getPointerTo());
-
-  // Load the type info.
-  Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
-  return CGF.Builder.CreateLoad(Value);
+  return CGF.CGM.getCXXABI().EmitTypeid(CGF, SrcRecordTy, ThisPtr,
+                                        StdTypeInfoPtrTy);
 }
 
 llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) {
@@ -1625,173 +1703,6 @@ llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) {
                                StdTypeInfoPtrTy);
 }
 
-static llvm::Constant *getDynamicCastFn(CodeGenFunction &CGF) {
-  // void *__dynamic_cast(const void *sub,
-  //                      const abi::__class_type_info *src,
-  //                      const abi::__class_type_info *dst,
-  //                      std::ptrdiff_t src2dst_offset);
-  
-  llvm::Type *Int8PtrTy = CGF.Int8PtrTy;
-  llvm::Type *PtrDiffTy = 
-    CGF.ConvertType(CGF.getContext().getPointerDiffType());
-
-  llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy };
-
-  llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false);
-
-  // Mark the function as nounwind readonly.
-  llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
-                                            llvm::Attribute::ReadOnly };
-  llvm::AttributeSet Attrs = llvm::AttributeSet::get(
-      CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
-
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs);
-}
-
-static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) {
-  // void __cxa_bad_cast();
-  llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);
-  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast");
-}
-
-static void EmitBadCastCall(CodeGenFunction &CGF) {
-  llvm::Value *Fn = getBadCastFn(CGF);
-  CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
-  CGF.Builder.CreateUnreachable();
-}
-
-/// \brief Compute the src2dst_offset hint as described in the
-/// Itanium C++ ABI [2.9.7]
-static CharUnits computeOffsetHint(ASTContext &Context,
-                                   const CXXRecordDecl *Src,
-                                   const CXXRecordDecl *Dst) {
-  CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
-                     /*DetectVirtual=*/false);
-
-  // If Dst is not derived from Src we can skip the whole computation below and
-  // return that Src is not a public base of Dst.  Record all inheritance paths.
-  if (!Dst->isDerivedFrom(Src, Paths))
-    return CharUnits::fromQuantity(-2ULL);
-
-  unsigned NumPublicPaths = 0;
-  CharUnits Offset;
-
-  // Now walk all possible inheritance paths.
-  for (CXXBasePaths::paths_iterator I = Paths.begin(), E = Paths.end();
-       I != E; ++I) {
-    if (I->Access != AS_public) // Ignore non-public inheritance.
-      continue;
-
-    ++NumPublicPaths;
-
-    for (CXXBasePath::iterator J = I->begin(), JE = I->end(); J != JE; ++J) {
-      // If the path contains a virtual base class we can't give any hint.
-      // -1: no hint.
-      if (J->Base->isVirtual())
-        return CharUnits::fromQuantity(-1ULL);
-
-      if (NumPublicPaths > 1) // Won't use offsets, skip computation.
-        continue;
-
-      // Accumulate the base class offsets.
-      const ASTRecordLayout &L = Context.getASTRecordLayout(J->Class);
-      Offset += L.getBaseClassOffset(J->Base->getType()->getAsCXXRecordDecl());
-    }
-  }
-
-  // -2: Src is not a public base of Dst.
-  if (NumPublicPaths == 0)
-    return CharUnits::fromQuantity(-2ULL);
-
-  // -3: Src is a multiple public base type but never a virtual base type.
-  if (NumPublicPaths > 1)
-    return CharUnits::fromQuantity(-3ULL);
-
-  // Otherwise, the Src type is a unique public nonvirtual base type of Dst.
-  // Return the offset of Src from the origin of Dst.
-  return Offset;
-}
-
-static llvm::Value *
-EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
-                    QualType SrcTy, QualType DestTy,
-                    llvm::BasicBlock *CastEnd) {
-  llvm::Type *PtrDiffLTy = 
-    CGF.ConvertType(CGF.getContext().getPointerDiffType());
-  llvm::Type *DestLTy = CGF.ConvertType(DestTy);
-
-  if (const PointerType *PTy = DestTy->getAs<PointerType>()) {
-    if (PTy->getPointeeType()->isVoidType()) {
-      // C++ [expr.dynamic.cast]p7:
-      //   If T is "pointer to cv void," then the result is a pointer to the
-      //   most derived object pointed to by v.
-
-      // Get the vtable pointer.
-      llvm::Value *VTable = CGF.GetVTablePtr(Value, PtrDiffLTy->getPointerTo());
-
-      // Get the offset-to-top from the vtable.
-      llvm::Value *OffsetToTop = 
-        CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL);
-      OffsetToTop = CGF.Builder.CreateLoad(OffsetToTop, "offset.to.top");
-
-      // Finally, add the offset to the pointer.
-      Value = CGF.EmitCastToVoidPtr(Value);
-      Value = CGF.Builder.CreateInBoundsGEP(Value, OffsetToTop);
-
-      return CGF.Builder.CreateBitCast(Value, DestLTy);
-    }
-  }
-
-  QualType SrcRecordTy;
-  QualType DestRecordTy;
-  
-  if (const PointerType *DestPTy = DestTy->getAs<PointerType>()) {
-    SrcRecordTy = SrcTy->castAs<PointerType>()->getPointeeType();
-    DestRecordTy = DestPTy->getPointeeType();
-  } else {
-    SrcRecordTy = SrcTy;
-    DestRecordTy = DestTy->castAs<ReferenceType>()->getPointeeType();
-  }
-
-  assert(SrcRecordTy->isRecordType() && "source type must be a record type!");
-  assert(DestRecordTy->isRecordType() && "dest type must be a record type!");
-
-  llvm::Value *SrcRTTI =
-    CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType());
-  llvm::Value *DestRTTI =
-    CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
-
-  // Compute the offset hint.
-  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
-  const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
-  llvm::Value *OffsetHint =
-    llvm::ConstantInt::get(PtrDiffLTy,
-                           computeOffsetHint(CGF.getContext(), SrcDecl,
-                                             DestDecl).getQuantity());
-
-  // Emit the call to __dynamic_cast.
-  Value = CGF.EmitCastToVoidPtr(Value);
-
-  llvm::Value *args[] = { Value, SrcRTTI, DestRTTI, OffsetHint };
-  Value = CGF.EmitNounwindRuntimeCall(getDynamicCastFn(CGF), args);
-  Value = CGF.Builder.CreateBitCast(Value, DestLTy);
-
-  /// C++ [expr.dynamic.cast]p9:
-  ///   A failed cast to reference type throws std::bad_cast
-  if (DestTy->isReferenceType()) {
-    llvm::BasicBlock *BadCastBlock = 
-      CGF.createBasicBlock("dynamic_cast.bad_cast");
-
-    llvm::Value *IsNull = CGF.Builder.CreateIsNull(Value);
-    CGF.Builder.CreateCondBr(IsNull, BadCastBlock, CastEnd);
-
-    CGF.EmitBlock(BadCastBlock);
-    EmitBadCastCall(CGF);
-  }
-
-  return Value;
-}
-
 static llvm::Value *EmitDynamicCastToNull(CodeGenFunction &CGF,
                                           QualType DestTy) {
   llvm::Type *DestLTy = CGF.ConvertType(DestTy);
@@ -1800,7 +1711,8 @@ static llvm::Value *EmitDynamicCastToNull(CodeGenFunction &CGF,
 
   /// C++ [expr.dynamic.cast]p9:
   ///   A failed cast to reference type throws std::bad_cast
-  EmitBadCastCall(CGF);
+  if (!CGF.CGM.getCXXABI().EmitBadCastCall(CGF))
+    return nullptr;
 
   CGF.EmitBlock(CGF.createBasicBlock("dynamic_cast.end"));
   return llvm::UndefValue::get(DestLTy);
@@ -1811,17 +1723,40 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(llvm::Value *Value,
   QualType DestTy = DCE->getTypeAsWritten();
 
   if (DCE->isAlwaysNull())
-    return EmitDynamicCastToNull(*this, DestTy);
+    if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy))
+      return T;
 
   QualType SrcTy = DCE->getSubExpr()->getType();
 
+  // C++ [expr.dynamic.cast]p7:
+  //   If T is "pointer to cv void," then the result is a pointer to the most
+  //   derived object pointed to by v.
+  const PointerType *DestPTy = DestTy->getAs<PointerType>();
+
+  bool isDynamicCastToVoid;
+  QualType SrcRecordTy;
+  QualType DestRecordTy;
+  if (DestPTy) {
+    isDynamicCastToVoid = DestPTy->getPointeeType()->isVoidType();
+    SrcRecordTy = SrcTy->castAs<PointerType>()->getPointeeType();
+    DestRecordTy = DestPTy->getPointeeType();
+  } else {
+    isDynamicCastToVoid = false;
+    SrcRecordTy = SrcTy;
+    DestRecordTy = DestTy->castAs<ReferenceType>()->getPointeeType();
+  }
+
+  assert(SrcRecordTy->isRecordType() && "source type must be a record type!");
+
   // C++ [expr.dynamic.cast]p4: 
   //   If the value of v is a null pointer value in the pointer case, the result
   //   is the null pointer value of type T.
-  bool ShouldNullCheckSrcValue = SrcTy->isPointerType();
-  
-  llvm::BasicBlock *CastNull = 0;
-  llvm::BasicBlock *CastNotNull = 0;
+  bool ShouldNullCheckSrcValue =
+      CGM.getCXXABI().shouldDynamicCastCallBeNullChecked(SrcTy->isPointerType(),
+                                                         SrcRecordTy);
+
+  llvm::BasicBlock *CastNull = nullptr;
+  llvm::BasicBlock *CastNotNull = nullptr;
   llvm::BasicBlock *CastEnd = createBasicBlock("dynamic_cast.end");
   
   if (ShouldNullCheckSrcValue) {
@@ -1833,7 +1768,15 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(llvm::Value *Value,
     EmitBlock(CastNotNull);
   }
 
-  Value = EmitDynamicCastCall(*this, Value, SrcTy, DestTy, CastEnd);
+  if (isDynamicCastToVoid) {
+    Value = CGM.getCXXABI().EmitDynamicCastToVoid(*this, Value, SrcRecordTy,
+                                                  DestTy);
+  } else {
+    assert(DestRecordTy->isRecordType() &&
+           "destination type must be a record type!");
+    Value = CGM.getCXXABI().EmitDynamicCastCall(*this, Value, SrcRecordTy,
+                                                DestTy, DestRecordTy, CastEnd);
+  }
 
   if (ShouldNullCheckSrcValue) {
     EmitBranch(CastEnd);
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 73d5bcb13a95..7244b9e4d1eb 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -93,7 +93,7 @@ public:
   ComplexPairTy Visit(Expr *E) {
     return StmtVisitor<ComplexExprEmitter, ComplexPairTy>::Visit(E);
   }
-    
+
   ComplexPairTy VisitStmt(Stmt *S) {
     S->dump(CGF.getContext().getSourceManager());
     llvm_unreachable("Stmt can't have complex result type!");
@@ -306,7 +306,7 @@ ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue,
   unsigned ComplexAlign = C.getTypeAlignInChars(ComplexTy).getQuantity();
   unsigned AlignI = std::min(AlignR, ComplexAlign);
 
-  llvm::Value *Real=0, *Imag=0;
+  llvm::Value *Real=nullptr, *Imag=nullptr;
 
   if (!IgnoreReal || isVolatile) {
     llvm::Value *RealP = Builder.CreateStructGEP(SrcPtr, 0,
@@ -410,7 +410,7 @@ ComplexPairTy ComplexExprEmitter::EmitScalarToComplexCast(llvm::Value *Val,
   return ComplexPairTy(Val, llvm::Constant::getNullValue(Val->getType()));
 }
 
-ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op, 
+ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
                                            QualType DestTy) {
   switch (CK) {
   case CK_Dependent: llvm_unreachable("dependent cast kind in IR gen!");
@@ -427,7 +427,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
   case CK_LValueBitCast: {
     LValue origLV = CGF.EmitLValue(Op);
     llvm::Value *V = origLV.getAddress();
-    V = Builder.CreateBitCast(V, 
+    V = Builder.CreateBitCast(V,
                     CGF.ConvertType(CGF.getContext().getPointerType(DestTy)));
     return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy,
                                                origLV.getAlignment()),
@@ -475,6 +475,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastExpr::CastKind CK, Expr *Op,
   case CK_CopyAndAutoreleaseBlockObject:
   case CK_BuiltinFnToFnPtr:
   case CK_ZeroToOCLEvent:
+  case CK_AddressSpaceConversion:
     llvm_unreachable("invalid cast kind for complex value");
 
   case CK_FloatingRealToComplex:
@@ -652,7 +653,7 @@ EmitCompoundAssignLValue(const CompoundAssignOperator *E,
   assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty,
                                                  E->getRHS()->getType()));
   OpInfo.RHS = Visit(E->getRHS());
-  
+
   LValue LHS = CGF.EmitLValue(E->getLHS());
 
   // Load from the l-value and convert it.
@@ -702,7 +703,7 @@ EmitCompoundAssign(const CompoundAssignOperator *E,
 
 LValue ComplexExprEmitter::EmitBinAssignLValue(const BinaryOperator *E,
                                                ComplexPairTy &Val) {
-  assert(CGF.getContext().hasSameUnqualifiedType(E->getLHS()->getType(), 
+  assert(CGF.getContext().hasSameUnqualifiedType(E->getLHS()->getType(),
                                                  E->getRHS()->getType()) &&
          "Invalid assignment");
   TestAndClearIgnoreReal();
@@ -751,11 +752,13 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
 
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
 
   eval.begin(CGF);
   CGF.EmitBlock(LHSBlock);
+  Cnt.beginRegion(Builder);
   ComplexPairTy LHS = Visit(E->getTrueExpr());
   LHSBlock = Builder.GetInsertBlock();
   CGF.EmitBranch(ContBlock);
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index f4d6861c8b8b..b508dcb446fb 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -530,7 +530,7 @@ llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM,
   ConstStructBuilder Builder(CGM, CGF);
 
   if (!Builder.Build(ILE))
-    return 0;
+    return nullptr;
 
   return Builder.Finalize(ILE->getType());
 }
@@ -572,7 +572,7 @@ public:
   //===--------------------------------------------------------------------===//
 
   llvm::Constant *VisitStmt(Stmt *S) {
-    return 0;
+    return nullptr;
   }
 
   llvm::Constant *VisitParenExpr(ParenExpr *PE) {
@@ -599,7 +599,7 @@ public:
   llvm::Constant *VisitCastExpr(CastExpr* E) {
     Expr *subExpr = E->getSubExpr();
     llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF);
-    if (!C) return 0;
+    if (!C) return nullptr;
 
     llvm::Type *destType = ConvertType(E->getType());
 
@@ -633,6 +633,9 @@ public:
       return llvm::ConstantStruct::get(STy, Elts);
     }
 
+    case CK_AddressSpaceConversion:
+      return llvm::ConstantExpr::getAddrSpaceCast(C, destType);
+
     case CK_LValueToRValue:
     case CK_AtomicToNonAtomic:
     case CK_NonAtomicToAtomic:
@@ -657,7 +660,7 @@ public:
     case CK_ARCReclaimReturnedObject:
     case CK_ARCExtendBlockObject:
     case CK_CopyAndAutoreleaseBlockObject:
-      return 0;
+      return nullptr;
 
     // These don't need to be handled here because Evaluate knows how to
     // evaluate them in the cases where they can be folded.
@@ -698,7 +701,7 @@ public:
     case CK_FloatingToBoolean:
     case CK_FloatingCast:
     case CK_ZeroToOCLEvent:
-      return 0;
+      return nullptr;
     }
     llvm_unreachable("Invalid CastKind");
   }
@@ -740,7 +743,7 @@ public:
       Expr *Init = ILE->getInit(i);
       llvm::Constant *C = CGM.EmitConstantExpr(Init, Init->getType(), CGF);
       if (!C)
-        return 0;
+        return nullptr;
       RewriteType |= (C->getType() != ElemTy);
       Elts.push_back(C);
     }
@@ -753,7 +756,7 @@ public:
     else
       fillC = llvm::Constant::getNullValue(ElemTy);
     if (!fillC)
-      return 0;
+      return nullptr;
     RewriteType |= (fillC->getType() != ElemTy);
     Elts.resize(NumElements, fillC);
 
@@ -786,12 +789,12 @@ public:
     if (ILE->getType()->isRecordType())
       return EmitRecordInitialization(ILE);
 
-    return 0;
+    return nullptr;
   }
 
   llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E) {
     if (!E->getConstructor()->isTrivial())
-      return 0;
+      return nullptr;
 
     QualType Ty = E->getType();
 
@@ -803,8 +806,8 @@ public:
     // If the class doesn't have a trivial destructor, we can't emit it as a
     // constant expr.
     if (!RD->hasTrivialDestructor())
-      return 0;
-    
+      return nullptr;
+
     // Only copy and default constructors can be trivial.
 
 
@@ -833,7 +836,10 @@ public:
     // as an inline array.
     std::string Str;
     CGM.getContext().getObjCEncodingForType(E->getEncodedType(), Str);
-    const ConstantArrayType *CAT = cast<ConstantArrayType>(E->getType());
+    QualType T = E->getType();
+    if (T->getTypeClass() == Type::TypeOfExpr)
+      T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType();
+    const ConstantArrayType *CAT = cast<ConstantArrayType>(T);
 
     // Resize the string to the right size, adding zeros at the end, or
     // truncating as needed.
@@ -866,7 +872,7 @@ public:
             return CGM.getStaticLocalDeclAddress(VD);
         }
       }
-      return 0;
+      return nullptr;
     }
 
     Expr *E = const_cast<Expr*>(LVBase.get<const Expr*>());
@@ -883,7 +889,7 @@ public:
         C = new llvm::GlobalVariable(CGM.getModule(), C->getType(),
                                      E->getType().isConstant(CGM.getContext()),
                                      llvm::GlobalValue::InternalLinkage,
-                                     C, ".compoundliteral", 0,
+                                     C, ".compoundliteral", nullptr,
                                      llvm::GlobalVariable::NotThreadLocal,
                           CGM.getContext().getTargetAddressSpace(E->getType()));
       return C;
@@ -917,7 +923,7 @@ public:
     }
     case Expr::CallExprClass: {
       CallExpr* CE = cast<CallExpr>(E);
-      unsigned builtin = CE->isBuiltinCall();
+      unsigned builtin = CE->getBuiltinCallee();
       if (builtin !=
             Builtin::BI__builtin___CFStringMakeConstantString &&
           builtin !=
@@ -964,7 +970,7 @@ public:
     }
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
@@ -998,7 +1004,7 @@ llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D,
   // interprets that as the (pointer) value of the reference, rather than the
   // desired value of the referee.
   if (D.getType()->isReferenceType())
-    return 0;
+    return nullptr;
 
   const Expr *E = D.getInit();
   assert(E && "No initializer to emit");
@@ -1023,7 +1029,7 @@ llvm::Constant *CodeGenModule::EmitConstantExpr(const Expr *E,
   else
     Success = E->EvaluateAsRValue(Result, Context);
 
-  llvm::Constant *C = 0;
+  llvm::Constant *C = nullptr;
   if (Success && !Result.HasSideEffects)
     C = EmitConstantValue(Result.Val, DestType, CGF);
   else
@@ -1039,6 +1045,25 @@ llvm::Constant *CodeGenModule::EmitConstantExpr(const Expr *E,
 llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
                                                  QualType DestType,
                                                  CodeGenFunction *CGF) {
+  // For an _Atomic-qualified constant, we may need to add tail padding.
+  if (auto *AT = DestType->getAs<AtomicType>()) {
+    QualType InnerType = AT->getValueType();
+    auto *Inner = EmitConstantValue(Value, InnerType, CGF);
+
+    uint64_t InnerSize = Context.getTypeSize(InnerType);
+    uint64_t OuterSize = Context.getTypeSize(DestType);
+    if (InnerSize == OuterSize)
+      return Inner;
+
+    assert(InnerSize < OuterSize && "emitted over-large constant for atomic");
+    llvm::Constant *Elts[] = {
+      Inner,
+      llvm::ConstantAggregateZero::get(
+          llvm::ArrayType::get(Int8Ty, (OuterSize - InnerSize) / 8))
+    };
+    return llvm::ConstantStruct::getAnon(Elts);
+  }
+
   switch (Value.getKind()) {
   case APValue::Uninitialized:
     llvm_unreachable("Constant expressions should be initialized.");
@@ -1060,15 +1085,17 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
 
       // Apply offset if necessary.
       if (!Offset->isNullValue()) {
-        llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, Int8PtrTy);
+        unsigned AS = C->getType()->getPointerAddressSpace();
+        llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS);
+        llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy);
         Casted = llvm::ConstantExpr::getGetElementPtr(Casted, Offset);
-        C = llvm::ConstantExpr::getBitCast(Casted, C->getType());
+        C = llvm::ConstantExpr::getPointerCast(Casted, C->getType());
       }
 
       // Convert to the appropriate type; this could be an lvalue for
       // an integer.
       if (isa<llvm::PointerType>(DestTy))
-        return llvm::ConstantExpr::getBitCast(C, DestTy);
+        return llvm::ConstantExpr::getPointerCast(C, DestTy);
 
       return llvm::ConstantExpr::getPtrToInt(C, DestTy);
     } else {
@@ -1166,13 +1193,13 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
     Elts.reserve(NumElements);
 
     // Emit array filler, if there is one.
-    llvm::Constant *Filler = 0;
+    llvm::Constant *Filler = nullptr;
     if (Value.hasArrayFiller())
       Filler = EmitConstantValueForMemory(Value.getArrayFiller(),
                                           CAT->getElementType(), CGF);
 
     // Emit initializer elements.
-    llvm::Type *CommonElementType = 0;
+    llvm::Type *CommonElementType = nullptr;
     for (unsigned I = 0; I < NumElements; ++I) {
       llvm::Constant *C = Filler;
       if (I < NumInitElts)
@@ -1183,7 +1210,7 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
       if (I == 0)
         CommonElementType = C->getType();
       else if (C->getType() != CommonElementType)
-        CommonElementType = 0;
+        CommonElementType = nullptr;
       Elts.push_back(C);
     }
 
@@ -1222,7 +1249,7 @@ CodeGenModule::EmitConstantValueForMemory(const APValue &Value,
 llvm::Constant *
 CodeGenModule::GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *E) {
   assert(E->isFileScope() && "not a file-scope compound literal expr");
-  return ConstExprEmitter(*this, 0).EmitLValue(E);
+  return ConstExprEmitter(*this, nullptr).EmitLValue(E);
 }
 
 llvm::Constant *
@@ -1265,15 +1292,14 @@ FillInNullDataMemberPointers(CodeGenModule &CGM, QualType T,
     const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
 
     // Go through all bases and fill in any null pointer to data members.
-    for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
-         E = RD->bases_end(); I != E; ++I) {
-      if (I->isVirtual()) {
+    for (const auto &I : RD->bases()) {
+      if (I.isVirtual()) {
         // Ignore virtual bases.
         continue;
       }
       
       const CXXRecordDecl *BaseDecl = 
-      cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
+      cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
       
       // Ignore empty bases.
       if (BaseDecl->isEmpty())
@@ -1285,7 +1311,7 @@ FillInNullDataMemberPointers(CodeGenModule &CGM, QualType T,
 
       uint64_t BaseOffset =
         CGM.getContext().toBits(Layout.getBaseClassOffset(BaseDecl));
-      FillInNullDataMemberPointers(CGM, I->getType(),
+      FillInNullDataMemberPointers(CGM, I.getType(),
                                    Elements, StartOffset + BaseOffset);
     }
     
@@ -1335,16 +1361,15 @@ static llvm::Constant *EmitNullConstant(CodeGenModule &CGM,
   std::vector<llvm::Constant *> elements(numElements);
 
   // Fill in all the bases.
-  for (CXXRecordDecl::base_class_const_iterator
-         I = record->bases_begin(), E = record->bases_end(); I != E; ++I) {
-    if (I->isVirtual()) {
+  for (const auto &I : record->bases()) {
+    if (I.isVirtual()) {
       // Ignore virtual bases; if we're laying out for a complete
       // object, we'll lay these out later.
       continue;
     }
 
     const CXXRecordDecl *base = 
-      cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+      cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
 
     // Ignore empty bases.
     if (base->isEmpty())
@@ -1356,28 +1381,24 @@ static llvm::Constant *EmitNullConstant(CodeGenModule &CGM,
   }
 
   // Fill in all the fields.
-  for (RecordDecl::field_iterator I = record->field_begin(),
-         E = record->field_end(); I != E; ++I) {
-    const FieldDecl *field = *I;
-
+  for (const auto *Field : record->fields()) {
     // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
     // will fill in later.)
-    if (!field->isBitField()) {
-      unsigned fieldIndex = layout.getLLVMFieldNo(field);
-      elements[fieldIndex] = CGM.EmitNullConstant(field->getType());
+    if (!Field->isBitField()) {
+      unsigned fieldIndex = layout.getLLVMFieldNo(Field);
+      elements[fieldIndex] = CGM.EmitNullConstant(Field->getType());
     }
 
     // For unions, stop after the first named field.
-    if (record->isUnion() && field->getDeclName())
+    if (record->isUnion() && Field->getDeclName())
       break;
   }
 
   // Fill in the virtual bases, if we're working with the complete object.
   if (asCompleteObject) {
-    for (CXXRecordDecl::base_class_const_iterator
-           I = record->vbases_begin(), E = record->vbases_end(); I != E; ++I) {
+    for (const auto &I : record->vbases()) {
       const CXXRecordDecl *base = 
-        cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
+        cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
 
       // Ignore empty bases.
       if (base->isEmpty())
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index f3a5387c58d3..9e0fbcfd1844 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -22,13 +22,13 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CFG.h"
 #include <cstdarg>
 
 using namespace clang;
@@ -246,7 +246,7 @@ public:
   }
   Value *VisitObjCMessageExpr(ObjCMessageExpr *E) {
     if (E->getMethodDecl() &&
-        E->getMethodDecl()->getResultType()->isReferenceType())
+        E->getMethodDecl()->getReturnType()->isReferenceType())
       return EmitLoadOfLValue(E);
     return CGF.EmitObjCMessageExpr(E).getScalarVal();
   }
@@ -365,13 +365,10 @@ public:
   }
   Value *VisitCXXDeleteExpr(const CXXDeleteExpr *E) {
     CGF.EmitCXXDeleteExpr(E);
-    return 0;
-  }
-  Value *VisitUnaryTypeTraitExpr(const UnaryTypeTraitExpr *E) {
-    return Builder.getInt1(E->getValue());
+    return nullptr;
   }
 
-  Value *VisitBinaryTypeTraitExpr(const BinaryTypeTraitExpr *E) {
+  Value *VisitTypeTraitExpr(const TypeTraitExpr *E) {
     return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue());
   }
 
@@ -390,7 +387,7 @@ public:
     //   effect is the evaluation of the postfix-expression before the dot or
     //   arrow.
     CGF.EmitScalarExpr(E->getBase());
-    return 0;
+    return nullptr;
   }
 
   Value *VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *E) {
@@ -399,7 +396,7 @@ public:
 
   Value *VisitCXXThrowExpr(const CXXThrowExpr *E) {
     CGF.EmitCXXThrowExpr(E);
-    return 0;
+    return nullptr;
   }
 
   Value *VisitCXXNoexceptExpr(const CXXNoexceptExpr *E) {
@@ -557,12 +554,13 @@ void ScalarExprEmitter::EmitFloatConversionCheck(Value *OrigSrc,
                                                  Value *Src, QualType SrcType,
                                                  QualType DstType,
                                                  llvm::Type *DstTy) {
+  CodeGenFunction::SanitizerScope SanScope(&CGF);
   using llvm::APFloat;
   using llvm::APSInt;
 
   llvm::Type *SrcTy = Src->getType();
 
-  llvm::Value *Check = 0;
+  llvm::Value *Check = nullptr;
   if (llvm::IntegerType *IntTy = dyn_cast<llvm::IntegerType>(SrcTy)) {
     // Integer to floating-point. This can fail for unsigned short -> __half
     // or unsigned __int128 -> float.
@@ -696,7 +694,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   DstType = CGF.getContext().getCanonicalType(DstType);
   if (SrcType == DstType) return Src;
 
-  if (DstType->isVoidType()) return 0;
+  if (DstType->isVoidType()) return nullptr;
 
   llvm::Value *OrigSrc = Src;
   QualType OrigSrcType = SrcType;
@@ -704,7 +702,10 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
 
   // If casting to/from storage-only half FP, use special intrinsics.
   if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
-    Src = Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16), Src);
+    Src = Builder.CreateCall(
+        CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
+                             CGF.CGM.FloatTy),
+        Src);
     SrcType = CGF.getContext().FloatTy;
     SrcTy = CGF.FloatTy;
   }
@@ -761,7 +762,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
     return Builder.CreateBitCast(Src, DstTy, "conv");
 
   // Finally, we have the arithmetic types: real int/float.
-  Value *Res = NULL;
+  Value *Res = nullptr;
   llvm::Type *ResTy = DstTy;
 
   // An overflowing conversion has undefined behavior if either the source type
@@ -800,7 +801,9 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
 
   if (DstTy != ResTy) {
     assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
-    Res = Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16), Res);
+    Res = Builder.CreateCall(
+        CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy),
+        Res);
   }
 
   return Res;
@@ -838,6 +841,7 @@ Value *ScalarExprEmitter::EmitNullValue(QualType Ty) {
 /// might actually be a unary increment which has been lowered to a binary
 /// operation). The check passes if \p Check, which is an \c i1, is \c true.
 void ScalarExprEmitter::EmitBinOpCheck(Value *Check, const BinOpInfo &Info) {
+  assert(CGF.IsSanitizerScope);
   StringRef CheckName;
   SmallVector<llvm::Constant *, 4> StaticData;
   SmallVector<llvm::Value *, 2> DynamicData;
@@ -890,7 +894,7 @@ void ScalarExprEmitter::EmitBinOpCheck(Value *Check, const BinOpInfo &Info) {
 Value *ScalarExprEmitter::VisitExpr(Expr *E) {
   CGF.ErrorUnsupported(E, "scalar expression");
   if (E->getType()->isVoidType())
-    return 0;
+    return nullptr;
   return llvm::UndefValue::get(CGF.ConvertType(E->getType()));
 }
 
@@ -943,9 +947,8 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
                                                   MTy->getNumElements());
     Value* NewV = llvm::UndefValue::get(RTy);
     for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) {
-      Value *IIndx = Builder.getInt32(i);
+      Value *IIndx = llvm::ConstantInt::get(CGF.SizeTy, i);
       Value *Indx = Builder.CreateExtractElement(Mask, IIndx, "shuf_idx");
-      Indx = Builder.CreateZExt(Indx, CGF.Int32Ty, "idx_zext");
 
       Value *VExt = Builder.CreateExtractElement(LHS, Indx, "shuf_elt");
       NewV = Builder.CreateInsertElement(NewV, VExt, IIndx, "shuf_ins");
@@ -1016,7 +1019,7 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
   }
 
   // We have the arithmetic types: real int/float.
-  Value *Res = NULL;
+  Value *Res = nullptr;
 
   if (isa<llvm::IntegerType>(SrcEltTy)) {
     bool InputSigned = SrcEltType->isSignedIntegerOrEnumerationType();
@@ -1076,8 +1079,6 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   if (CGF.SanOpts->ArrayBounds)
     CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true);
 
-  bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
-  Idx = Builder.CreateIntCast(Idx, CGF.Int32Ty, IdxSigned, "vecidxcast");
   return Builder.CreateExtractElement(Base, Idx, "vecext");
 }
 
@@ -1136,7 +1137,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
 
         if (EI->getVectorOperandType()->getNumElements() == ResElts) {
           llvm::ConstantInt *C = cast<llvm::ConstantInt>(EI->getIndexOperand());
-          Value *LHS = 0, *RHS = 0;
+          Value *LHS = nullptr, *RHS = nullptr;
           if (CurIdx == 0) {
             // insert into undef -> shuffle (src, undef)
             Args.push_back(C);
@@ -1299,7 +1300,18 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   case CK_AnyPointerToBlockPointerCast:
   case CK_BitCast: {
     Value *Src = Visit(const_cast<Expr*>(E));
-    return Builder.CreateBitCast(Src, ConvertType(DestTy));
+    llvm::Type *SrcTy = Src->getType();
+    llvm::Type *DstTy = ConvertType(DestTy);
+    if (SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() &&
+        SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) {
+      llvm::Type *MidTy = CGF.CGM.getDataLayout().getIntPtrType(SrcTy);
+      return Builder.CreateIntToPtr(Builder.CreatePtrToInt(Src, MidTy), DstTy);
+    }
+    return Builder.CreateBitCast(Src, DstTy);
+  }
+  case CK_AddressSpaceConversion: {
+    Value *Src = Visit(const_cast<Expr*>(E));
+    return Builder.CreateAddrSpaceCast(Src, ConvertType(DestTy));
   }
   case CK_AtomicToNonAtomic:
   case CK_NonAtomicToAtomic:
@@ -1320,7 +1332,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 
     // C++11 [expr.static.cast]p11: Behavior is undefined if a downcast is
     // performed and the object is not of the derived type.
-    if (CGF.SanitizePerformTypeCheck)
+    if (CGF.sanitizePerformTypeCheck())
       CGF.EmitTypeCheck(CodeGenFunction::TCK_DowncastPointer, CE->getExprLoc(),
                         Derived, DestTy->getPointeeType());
 
@@ -1360,7 +1372,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 
     // Make sure the array decay ends up being the right type.  This matters if
     // the array type was of an incomplete type.
-    return CGF.Builder.CreateBitCast(V, ConvertType(CE->getType()));
+    return CGF.Builder.CreatePointerCast(V, ConvertType(CE->getType()));
   }
   case CK_FunctionToPointerDecay:
     return EmitLValue(E).getAddress();
@@ -1442,7 +1454,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 
   case CK_ToVoid: {
     CGF.EmitIgnoredExpr(E);
-    return 0;
+    return nullptr;
   }
   case CK_VectorSplat: {
     llvm::Type *DstTy = ConvertType(DestTy);
@@ -1452,7 +1464,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
-    return Builder.CreateVectorSplat(NumElements, Elt, "splat");;
+    return Builder.CreateVectorSplat(NumElements, Elt, "splat");
   }
 
   case CK_IntegralCast:
@@ -1485,7 +1497,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   }
 
   case CK_ZeroToOCLEvent: {
-    assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non event type");
+    assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non-event type");
     return llvm::Constant::getNullValue(ConvertType(DestTy));
   }
 
@@ -1499,7 +1511,7 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) {
   llvm::Value *RetAlloca = CGF.EmitCompoundStmt(*E->getSubStmt(),
                                                 !E->getType()->isVoidType());
   if (!RetAlloca)
-    return 0;
+    return nullptr;
   return CGF.EmitLoadOfScalar(CGF.MakeAddrLValue(RetAlloca, E->getType()),
                               E->getExprLoc());
 }
@@ -1537,7 +1549,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                            bool isInc, bool isPre) {
 
   QualType type = E->getSubExpr()->getType();
-  llvm::PHINode *atomicPHI = 0;
+  llvm::PHINode *atomicPHI = nullptr;
   llvm::Value *value;
   llvm::Value *input;
 
@@ -1610,12 +1622,11 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
 
     // Note that signed integer inc/dec with width less than int can't
     // overflow because of promotion rules; we're just eliding a few steps here.
-    if (value->getType()->getPrimitiveSizeInBits() >=
-            CGF.IntTy->getBitWidth() &&
-        type->isSignedIntegerOrEnumerationType()) {
+    bool CanOverflow = value->getType()->getIntegerBitWidth() >=
+                       CGF.IntTy->getIntegerBitWidth();
+    if (CanOverflow && type->isSignedIntegerOrEnumerationType()) {
       value = EmitAddConsiderOverflowBehavior(E, value, amt, isInc);
-    } else if (value->getType()->getPrimitiveSizeInBits() >=
-               CGF.IntTy->getBitWidth() && type->isUnsignedIntegerType() &&
+    } else if (CanOverflow && type->isUnsignedIntegerType() &&
                CGF.SanOpts->UnsignedIntegerOverflow) {
       BinOpInfo BinOp;
       BinOp.LHS = value;
@@ -1682,9 +1693,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
       // Another special case: half FP increment should be done via float
-      value =
-    Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16),
-                       input);
+      value = Builder.CreateCall(
+          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
+                               CGF.CGM.FloatTy),
+          input);
     }
 
     if (value->getType()->isFloatTy())
@@ -1703,9 +1715,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
 
     if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
-      value =
-       Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16),
-                          value);
+      value = Builder.CreateCall(
+          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
+                               CGF.CGM.FloatTy),
+          value);
 
   // Objective-C pointer types.
   } else {
@@ -1727,10 +1740,12 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
   if (atomicPHI) {
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
-    llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
-        CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
+    llvm::Value *pair = Builder.CreateAtomicCmpXchg(
+        LV.getAddress(), atomicPHI, CGF.EmitToMemory(value, type),
+        llvm::SequentiallyConsistent, llvm::SequentiallyConsistent);
+    llvm::Value *old = Builder.CreateExtractValue(pair, 0);
+    llvm::Value *success = Builder.CreateExtractValue(pair, 1);
     atomicPHI->addIncoming(old, opBB);
-    llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
     Builder.CreateCondBr(success, contBB, opBB);
     Builder.SetInsertPoint(contBB);
     return isPre ? value : input;
@@ -1810,7 +1825,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
   QualType CurrentType = E->getTypeSourceInfo()->getType();
   for (unsigned i = 0; i != n; ++i) {
     OffsetOfExpr::OffsetOfNode ON = E->getComponent(i);
-    llvm::Value *Offset = 0;
+    llvm::Value *Offset = nullptr;
     switch (ON.getKind()) {
     case OffsetOfExpr::OffsetOfNode::Array: {
       // Compute the index
@@ -1906,7 +1921,7 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
 
       QualType eltType;
       llvm::Value *numElts;
-      llvm::tie(numElts, eltType) = CGF.getVLASize(VAT);
+      std::tie(numElts, eltType) = CGF.getVLASize(VAT);
 
       llvm::Value *size = numElts;
 
@@ -2000,7 +2015,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   // Load/convert the LHS.
   LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
 
-  llvm::PHINode *atomicPHI = 0;
+  llvm::PHINode *atomicPHI = nullptr;
   if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
     QualType type = atomicTy->getValueType();
     if (!type->isBooleanType() && type->isIntegerType() &&
@@ -2069,10 +2084,12 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   if (atomicPHI) {
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
-    llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
-        CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
+    llvm::Value *pair = Builder.CreateAtomicCmpXchg(
+        LHSLV.getAddress(), atomicPHI, CGF.EmitToMemory(Result, LHSTy),
+        llvm::SequentiallyConsistent, llvm::SequentiallyConsistent);
+    llvm::Value *old = Builder.CreateExtractValue(pair, 0);
+    llvm::Value *success = Builder.CreateExtractValue(pair, 1);
     atomicPHI->addIncoming(old, opBB);
-    llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
     Builder.CreateCondBr(success, contBB, opBB);
     Builder.SetInsertPoint(contBB);
     return LHSLV;
@@ -2098,7 +2115,7 @@ Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E,
 
   // If the result is clearly ignored, return now.
   if (Ignore)
-    return 0;
+    return nullptr;
 
   // The result of an assignment in C is the assigned r-value.
   if (!CGF.getLangOpts().CPlusPlus)
@@ -2114,7 +2131,7 @@ Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E,
 
 void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
     const BinOpInfo &Ops, llvm::Value *Zero, bool isDiv) {
-  llvm::Value *Cond = 0;
+  llvm::Value *Cond = nullptr;
 
   if (CGF.SanOpts->IntegerDivideByZero)
     Cond = Builder.CreateICmpNE(Ops.RHS, Zero);
@@ -2138,15 +2155,18 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
 }
 
 Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
-  if ((CGF.SanOpts->IntegerDivideByZero ||
-       CGF.SanOpts->SignedIntegerOverflow) &&
-      Ops.Ty->isIntegerType()) {
-    llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
-    EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true);
-  } else if (CGF.SanOpts->FloatDivideByZero &&
-             Ops.Ty->isRealFloatingType()) {
-    llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
-    EmitBinOpCheck(Builder.CreateFCmpUNE(Ops.RHS, Zero), Ops);
+  {
+    CodeGenFunction::SanitizerScope SanScope(&CGF);
+    if ((CGF.SanOpts->IntegerDivideByZero ||
+         CGF.SanOpts->SignedIntegerOverflow) &&
+        Ops.Ty->isIntegerType()) {
+      llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
+      EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true);
+    } else if (CGF.SanOpts->FloatDivideByZero &&
+               Ops.Ty->isRealFloatingType()) {
+      llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
+      EmitBinOpCheck(Builder.CreateFCmpUNE(Ops.RHS, Zero), Ops);
+    }
   }
 
   if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
@@ -2170,6 +2190,7 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
 Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) {
   // Rem in C can't be a floating point type: C99 6.5.5p2.
   if (CGF.SanOpts->IntegerDivideByZero) {
+    CodeGenFunction::SanitizerScope SanScope(&CGF);
     llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty));
 
     if (Ops.Ty->isIntegerType())
@@ -2227,9 +2248,10 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
   if (handlerName->empty()) {
     // If the signed-integer-overflow sanitizer is enabled, emit a call to its
     // runtime. Otherwise, this is a -ftrapv check, so just emit a trap.
-    if (!isSigned || CGF.SanOpts->SignedIntegerOverflow)
+    if (!isSigned || CGF.SanOpts->SignedIntegerOverflow) {
+      CodeGenFunction::SanitizerScope SanScope(&CGF);
       EmitBinOpCheck(Builder.CreateNot(overflow), Ops);
-    else
+    } else
       CGF.EmitTrapCheck(Builder.CreateNot(overflow));
     return result;
   }
@@ -2238,7 +2260,7 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
   llvm::BasicBlock *initialBB = Builder.GetInsertBlock();
   llvm::Function::iterator insertPt = initialBB;
   llvm::BasicBlock *continueBB = CGF.createBasicBlock("nooverflow", CGF.CurFn,
-                                                      llvm::next(insertPt));
+                                                      std::next(insertPt));
   llvm::BasicBlock *overflowBB = CGF.createBasicBlock("overflow", CGF.CurFn);
 
   Builder.CreateCondBr(overflow, overflowBB, continueBB);
@@ -2418,12 +2440,12 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
 
   // Check whether this op is marked as fusable.
   if (!op.FPContractable)
-    return 0;
+    return nullptr;
 
   // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is
   // either disabled, or handled entirely by the LLVM backend).
   if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On)
-    return 0;
+    return nullptr;
 
   // We have a potentially fusable op. Look for a mul on one of the operands.
   if (llvm::BinaryOperator* LHSBinOp = dyn_cast<llvm::BinaryOperator>(op.LHS)) {
@@ -2441,7 +2463,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
@@ -2523,13 +2545,13 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
   const BinaryOperator *expr = cast<BinaryOperator>(op.E);
   QualType elementType = expr->getLHS()->getType()->getPointeeType();
 
-  llvm::Value *divisor = 0;
+  llvm::Value *divisor = nullptr;
 
   // For a variable-length array, this is going to be non-constant.
   if (const VariableArrayType *vla
         = CGF.getContext().getAsVariableArrayType(elementType)) {
     llvm::Value *numElements;
-    llvm::tie(numElements, elementType) = CGF.getVLASize(vla);
+    std::tie(numElements, elementType) = CGF.getVLASize(vla);
 
     divisor = numElements;
 
@@ -2581,6 +2603,7 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
 
   if (CGF.SanOpts->Shift && !CGF.getLangOpts().OpenCL &&
       isa<llvm::IntegerType>(Ops.LHS->getType())) {
+    CodeGenFunction::SanitizerScope SanScope(&CGF);
     llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS);
     llvm::Value *Valid = Builder.CreateICmpULE(RHS, WidthMinusOne);
 
@@ -2632,8 +2655,10 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
     RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom");
 
   if (CGF.SanOpts->Shift && !CGF.getLangOpts().OpenCL &&
-      isa<llvm::IntegerType>(Ops.LHS->getType()))
+      isa<llvm::IntegerType>(Ops.LHS->getType())) {
+    CodeGenFunction::SanitizerScope SanScope(&CGF);
     EmitBinOpCheck(Builder.CreateICmpULE(RHS, GetWidthMinusOneValue(Ops.LHS, RHS)), Ops);
+  }
 
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
@@ -2818,11 +2843,11 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
 
   switch (E->getLHS()->getType().getObjCLifetime()) {
   case Qualifiers::OCL_Strong:
-    llvm::tie(LHS, RHS) = CGF.EmitARCStoreStrong(E, Ignore);
+    std::tie(LHS, RHS) = CGF.EmitARCStoreStrong(E, Ignore);
     break;
 
   case Qualifiers::OCL_Autoreleasing:
-    llvm::tie(LHS,RHS) = CGF.EmitARCStoreAutoreleasing(E);
+    std::tie(LHS, RHS) = CGF.EmitARCStoreAutoreleasing(E);
     break;
 
   case Qualifiers::OCL_Weak:
@@ -2851,7 +2876,7 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
 
   // If the result is clearly ignored, return now.
   if (Ignore)
-    return 0;
+    return nullptr;
 
   // The result of an assignment in C is the assigned r-value.
   if (!CGF.getLangOpts().CPlusPlus)
@@ -2866,8 +2891,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
 }
 
 Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
   // Perform vector logical and on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
+    Cnt.beginRegion(Builder);
+
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
     Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2889,6 +2918,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
   bool LHSCondVal;
   if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
     if (LHSCondVal) { // If we have 1 && X, just emit X.
+      Cnt.beginRegion(Builder);
+
       Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
       // ZExt result to int or bool.
       return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext");
@@ -2905,7 +2936,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
   CodeGenFunction::ConditionalEvaluation eval(CGF);
 
   // Branch on the LHS first.  If it is false, go to the failure (cont) block.
-  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock);
+  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
 
   // Any edges into the ContBlock are now from an (indeterminate number of)
   // edges from this first condition.  All of these values will be false.  Start
@@ -2918,18 +2949,20 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
 
   eval.begin(CGF);
   CGF.EmitBlock(RHSBlock);
+  Cnt.beginRegion(Builder);
   Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
   eval.end(CGF);
 
   // Reaquire the RHS block, as there may be subblocks inserted.
   RHSBlock = Builder.GetInsertBlock();
 
-  // Emit an unconditional branch from this block to ContBlock.  Insert an entry
-  // into the phi node for the edge with the value of RHSCond.
-  if (CGF.getDebugInfo())
+  // Emit an unconditional branch from this block to ContBlock.
+  {
     // There is no need to emit line number for unconditional branch.
-    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-  CGF.EmitBlock(ContBlock);
+    SuppressDebugLocation S(Builder);
+    CGF.EmitBlock(ContBlock);
+  }
+  // Insert an entry into the phi node for the edge with the value of RHSCond.
   PN->addIncoming(RHSCond, RHSBlock);
 
   // ZExt result to int.
@@ -2937,8 +2970,12 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
 }
 
 Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
   // Perform vector logical or on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
+    Cnt.beginRegion(Builder);
+
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
     Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@@ -2960,6 +2997,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
   bool LHSCondVal;
   if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
     if (!LHSCondVal) { // If we have 0 || X, just emit X.
+      Cnt.beginRegion(Builder);
+
       Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
       // ZExt result to int or bool.
       return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext");
@@ -2976,7 +3015,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
   CodeGenFunction::ConditionalEvaluation eval(CGF);
 
   // Branch on the LHS first.  If it is true, go to the success (cont) block.
-  CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
+                           Cnt.getParentCount() - Cnt.getCount());
 
   // Any edges into the ContBlock are now from an (indeterminate number of)
   // edges from this first condition.  All of these values will be true.  Start
@@ -2991,6 +3031,7 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
 
   // Emit the RHS condition as a bool value.
   CGF.EmitBlock(RHSBlock);
+  Cnt.beginRegion(Builder);
   Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
 
   eval.end(CGF);
@@ -3041,6 +3082,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
 
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
+  RegionCounter Cnt = CGF.getPGORegionCounter(E);
 
   Expr *condExpr = E->getCond();
   Expr *lhsExpr = E->getTrueExpr();
@@ -3055,6 +3097,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
 
     // If the dead side doesn't have labels we need, just emit the Live part.
     if (!CGF.ContainsLabel(dead)) {
+      if (CondExprBool)
+        Cnt.beginRegion(Builder);
       Value *Result = Visit(live);
 
       // If the live part is a throw expression, it acts like it has a void
@@ -3071,6 +3115,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
   // the select function.
   if (CGF.getLangOpts().OpenCL
       && condExpr->getType()->isVectorType()) {
+    Cnt.beginRegion(Builder);
+
     llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
     llvm::Value *LHS = Visit(lhsExpr);
     llvm::Value *RHS = Visit(rhsExpr);
@@ -3114,13 +3160,15 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
   // safe to evaluate the LHS and RHS unconditionally.
   if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
       isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
+    Cnt.beginRegion(Builder);
+
     llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
     llvm::Value *LHS = Visit(lhsExpr);
     llvm::Value *RHS = Visit(rhsExpr);
     if (!LHS) {
       // If the conditional has void type, make sure we return a null Value*.
       assert(!RHS && "LHS and RHS types must match");
-      return 0;
+      return nullptr;
     }
     return Builder.CreateSelect(CondV, LHS, RHS, "cond");
   }
@@ -3130,9 +3178,10 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
 
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock);
+  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
 
   CGF.EmitBlock(LHSBlock);
+  Cnt.beginRegion(Builder);
   eval.begin(CGF);
   Value *LHS = Visit(lhsExpr);
   eval.end(CGF);
@@ -3166,6 +3215,10 @@ Value *ScalarExprEmitter::VisitChooseExpr(ChooseExpr *E) {
 }
 
 Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
+  QualType Ty = VE->getType();
+  if (Ty->isVariablyModifiedType())
+    CGF.EmitVariablyModifiedType(Ty);
+
   llvm::Value *ArgValue = CGF.EmitVAListRef(VE->getSubExpr());
   llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType());
 
@@ -3312,7 +3365,7 @@ LValue CodeGenFunction::EmitObjCIsaExpr(const ObjCIsaExpr *E) {
 LValue CodeGenFunction::EmitCompoundAssignmentLValue(
                                             const CompoundAssignOperator *E) {
   ScalarExprEmitter Scalar(*this);
-  Value *Result = 0;
+  Value *Result = nullptr;
   switch (E->getOpcode()) {
 #define COMPOUND_OP(Op)                                                       \
     case BO_##Op##Assign:                                                     \
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
new file mode 100644
index 000000000000..a273f1d4dda8
--- /dev/null
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -0,0 +1,112 @@
+//===---- CGLoopInfo.cpp - LLVM CodeGen for loop metadata -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGLoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+using namespace clang;
+using namespace CodeGen;
+using namespace llvm;
+
+static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
+
+  if (!Attrs.IsParallel && Attrs.VectorizerWidth == 0 &&
+      Attrs.VectorizerUnroll == 0 &&
+      Attrs.VectorizerEnable == LoopAttributes::VecUnspecified)
+    return nullptr;
+
+  SmallVector<Value *, 4> Args;
+  // Reserve operand 0 for loop id self reference.
+  MDNode *TempNode = MDNode::getTemporary(Ctx, None);
+  Args.push_back(TempNode);
+
+  // Setting vectorizer.width
+  if (Attrs.VectorizerWidth > 0) {
+    Value *Vals[] = { MDString::get(Ctx, "llvm.loop.vectorize.width"),
+                      ConstantInt::get(Type::getInt32Ty(Ctx),
+                                       Attrs.VectorizerWidth) };
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  // Setting vectorizer.unroll
+  if (Attrs.VectorizerUnroll > 0) {
+    Value *Vals[] = { MDString::get(Ctx, "llvm.loop.interleave.count"),
+                      ConstantInt::get(Type::getInt32Ty(Ctx),
+                                       Attrs.VectorizerUnroll) };
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  // Setting vectorizer.enable
+  if (Attrs.VectorizerEnable != LoopAttributes::VecUnspecified) {
+    Value *Vals[] = { MDString::get(Ctx, "llvm.loop.vectorize.enable"),
+                      ConstantInt::get(Type::getInt1Ty(Ctx),
+                                       (Attrs.VectorizerEnable ==
+                                        LoopAttributes::VecEnable)) };
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  MDNode *LoopID = MDNode::get(Ctx, Args);
+  assert(LoopID->use_empty() && "LoopID should not be used");
+
+  // Set the first operand to itself.
+  LoopID->replaceOperandWith(0, LoopID);
+  MDNode::deleteTemporary(TempNode);
+  return LoopID;
+}
+
+LoopAttributes::LoopAttributes(bool IsParallel)
+    : IsParallel(IsParallel), VectorizerEnable(LoopAttributes::VecUnspecified),
+      VectorizerWidth(0), VectorizerUnroll(0) {}
+
+void LoopAttributes::clear() {
+  IsParallel = false;
+  VectorizerWidth = 0;
+  VectorizerUnroll = 0;
+  VectorizerEnable = LoopAttributes::VecUnspecified;
+}
+
+LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
+    : LoopID(nullptr), Header(Header), Attrs(Attrs) {
+  LoopID = createMetadata(Header->getContext(), Attrs);
+}
+
+void LoopInfoStack::push(BasicBlock *Header) {
+  Active.push_back(LoopInfo(Header, StagedAttrs));
+  // Clear the attributes so nested loops do not inherit them.
+  StagedAttrs.clear();
+}
+
+void LoopInfoStack::pop() {
+  assert(!Active.empty() && "No active loops to pop");
+  Active.pop_back();
+}
+
+void LoopInfoStack::InsertHelper(Instruction *I) const {
+  if (!hasInfo())
+    return;
+
+  const LoopInfo &L = getInfo();
+  if (!L.getLoopID())
+    return;
+
+  if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
+    for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
+      if (TI->getSuccessor(i) == L.getHeader()) {
+        TI->setMetadata("llvm.loop", L.getLoopID());
+        break;
+      }
+    return;
+  }
+
+  if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory())
+    I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID());
+}
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
new file mode 100644
index 000000000000..2f6f172e047a
--- /dev/null
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -0,0 +1,136 @@
+//===---- CGLoopInfo.h - LLVM CodeGen for loop metadata -*- C++ -*---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the internal state used for llvm translation for loop statement
+// metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_CGLOOPINFO_H
+#define CLANG_CODEGEN_CGLOOPINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class BasicBlock;
+class Instruction;
+class MDNode;
+} // end namespace llvm
+
+namespace clang {
+namespace CodeGen {
+
+/// \brief Attributes that may be specified on loops.
+struct LoopAttributes {
+  explicit LoopAttributes(bool IsParallel = false);
+  void clear();
+
+  /// \brief Generate llvm.loop.parallel metadata for loads and stores.
+  bool IsParallel;
+
+  /// \brief Values of llvm.loop.vectorize.enable metadata.
+  enum LVEnableState { VecUnspecified, VecEnable, VecDisable };
+
+  /// \brief llvm.loop.vectorize.enable
+  LVEnableState VectorizerEnable;
+
+  /// \brief llvm.loop.vectorize.width
+  unsigned VectorizerWidth;
+
+  /// \brief llvm.loop.interleave.count
+  unsigned VectorizerUnroll;
+};
+
+/// \brief Information used when generating a structured loop.
+class LoopInfo {
+public:
+  /// \brief Construct a new LoopInfo for the loop with entry Header.
+  LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs);
+
+  /// \brief Get the loop id metadata for this loop.
+  llvm::MDNode *getLoopID() const { return LoopID; }
+
+  /// \brief Get the header block of this loop.
+  llvm::BasicBlock *getHeader() const { return Header; }
+
+  /// \brief Get the set of attributes active for this loop.
+  const LoopAttributes &getAttributes() const { return Attrs; }
+
+private:
+  /// \brief Loop ID metadata.
+  llvm::MDNode *LoopID;
+  /// \brief Header block of this loop.
+  llvm::BasicBlock *Header;
+  /// \brief The attributes for this loop.
+  LoopAttributes Attrs;
+};
+
+/// \brief A stack of loop information corresponding to loop nesting levels.
+/// This stack can be used to prepare attributes which are applied when a loop
+/// is emitted.
+class LoopInfoStack {
+  LoopInfoStack(const LoopInfoStack &) LLVM_DELETED_FUNCTION;
+  void operator=(const LoopInfoStack &) LLVM_DELETED_FUNCTION;
+
+public:
+  LoopInfoStack() {}
+
+  /// \brief Begin a new structured loop. The set of staged attributes will be
+  /// applied to the loop and then cleared.
+  void push(llvm::BasicBlock *Header);
+
+  /// \brief End the current loop.
+  void pop();
+
+  /// \brief Return the top loop id metadata.
+  llvm::MDNode *getCurLoopID() const { return getInfo().getLoopID(); }
+
+  /// \brief Return true if the top loop is parallel.
+  bool getCurLoopParallel() const {
+    return hasInfo() ? getInfo().getAttributes().IsParallel : false;
+  }
+
+  /// \brief Function called by the CodeGenFunction when an instruction is
+  /// created.
+  void InsertHelper(llvm::Instruction *I) const;
+
+  /// \brief Set the next pushed loop as parallel.
+  void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; }
+
+  /// \brief Set the next pushed loop 'vectorizer.enable'
+  void setVectorizerEnable(bool Enable = true) {
+    StagedAttrs.VectorizerEnable =
+        Enable ? LoopAttributes::VecEnable : LoopAttributes::VecDisable;
+  }
+
+  /// \brief Set the vectorizer width for the next loop pushed.
+  void setVectorizerWidth(unsigned W) { StagedAttrs.VectorizerWidth = W; }
+
+  /// \brief Set the vectorizer unroll for the next loop pushed.
+  void setVectorizerUnroll(unsigned U) { StagedAttrs.VectorizerUnroll = U; }
+
+private:
+  /// \brief Returns true if there is LoopInfo on the stack.
+  bool hasInfo() const { return !Active.empty(); }
+  /// \brief Return the LoopInfo for the current loop. HasInfo should be called
+  /// first to ensure LoopInfo is present.
+  const LoopInfo &getInfo() const { return Active.back(); }
+  /// \brief The set of attributes that will be applied to the next pushed loop.
+  LoopAttributes StagedAttrs;
+  /// \brief Stack of active loops.
+  llvm::SmallVector<LoopInfo, 4> Active;
+};
+
+} // end namespace CodeGen
+} // end namespace clang
+
+#endif // CLANG_CODEGEN_CGLOOPINFO_H
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index 0bda053f35f7..8ca80808e007 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -22,7 +22,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 using namespace clang;
@@ -79,10 +79,10 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) {
   RValue RV = EmitAnyExpr(SubExpr);
   CallArgList Args;
   Args.add(RV, ArgQT);
-  
-  RValue result = Runtime.GenerateMessageSend(*this, ReturnValueSlot(), 
-                                              BoxingMethod->getResultType(), Sel, Receiver, Args, 
-                                              ClassDecl, BoxingMethod);
+
+  RValue result = Runtime.GenerateMessageSend(
+      *this, ReturnValueSlot(), BoxingMethod->getReturnType(), Sel, Receiver,
+      Args, ClassDecl, BoxingMethod);
   return Builder.CreateBitCast(result.getScalarVal(), 
                                ConvertType(E->getType()));
 }
@@ -90,7 +90,7 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) {
 llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
                                     const ObjCMethodDecl *MethodWithObjects) {
   ASTContext &Context = CGM.getContext();
-  const ObjCDictionaryLiteral *DLE = 0;
+  const ObjCDictionaryLiteral *DLE = nullptr;
   const ObjCArrayLiteral *ALE = dyn_cast<ObjCArrayLiteral>(E);
   if (!ALE)
     DLE = cast<ObjCDictionaryLiteral>(E);
@@ -106,8 +106,8 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
                                    ArrayType::Normal, /*IndexTypeQuals=*/0);
 
   // Allocate the temporary array(s).
-  llvm::Value *Objects = CreateMemTemp(ElementArrayType, "objects");  
-  llvm::Value *Keys = 0;
+  llvm::Value *Objects = CreateMemTemp(ElementArrayType, "objects");
+  llvm::Value *Keys = nullptr;
   if (DLE)
     Keys = CreateMemTemp(ElementArrayType, "keys");
   
@@ -186,12 +186,9 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
   llvm::Value *Receiver = Runtime.GetClass(*this, Class);
 
   // Generate the message send.
-  RValue result
-    = Runtime.GenerateMessageSend(*this, ReturnValueSlot(), 
-                                  MethodWithObjects->getResultType(),
-                                  Sel,
-                                  Receiver, Args, Class,
-                                  MethodWithObjects);
+  RValue result = Runtime.GenerateMessageSend(
+      *this, ReturnValueSlot(), MethodWithObjects->getReturnType(), Sel,
+      Receiver, Args, Class, MethodWithObjects);
 
   // The above message send needs these objects, but in ARC they are
   // passed in a buffer that is essentially __unsafe_unretained.
@@ -238,7 +235,7 @@ static RValue AdjustRelatedResultType(CodeGenFunction &CGF,
     return Result;
 
   if (!Method->hasRelatedResultType() ||
-      CGF.getContext().hasSameType(ExpT, Method->getResultType()) ||
+      CGF.getContext().hasSameType(ExpT, Method->getReturnType()) ||
       !Result.isScalar())
     return Result;
   
@@ -317,10 +314,10 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
   CGObjCRuntime &Runtime = CGM.getObjCRuntime();
   bool isSuperMessage = false;
   bool isClassMessage = false;
-  ObjCInterfaceDecl *OID = 0;
+  ObjCInterfaceDecl *OID = nullptr;
   // Find the receiver
   QualType ReceiverType;
-  llvm::Value *Receiver = 0;
+  llvm::Value *Receiver = nullptr;
   switch (E->getReceiverKind()) {
   case ObjCMessageExpr::Instance:
     ReceiverType = E->getInstanceReceiver()->getType();
@@ -369,8 +366,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
       shouldExtendReceiverForInnerPointerMessage(E))
     Receiver = EmitARCRetainAutorelease(ReceiverType, Receiver);
 
-  QualType ResultType =
-    method ? method->getResultType() : E->getType();
+  QualType ResultType = method ? method->getReturnType() : E->getType();
 
   CallArgList Args;
   EmitCallArgs(Args, method, E->arg_begin(), E->arg_end());
@@ -435,7 +431,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
 
 namespace {
 struct FinishARCDealloc : EHScopeStack::Cleanup {
-  void Emit(CodeGenFunction &CGF, Flags flags) {
+  void Emit(CodeGenFunction &CGF, Flags flags) override {
     const ObjCMethodDecl *method = cast<ObjCMethodDecl>(CGF.CurCodeDecl);
 
     const ObjCImplDecl *impl = cast<ObjCImplDecl>(method->getDeclContext());
@@ -470,7 +466,7 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
   FunctionArgList args;
   // Check if we should generate debug info for this method.
   if (OMD->hasAttr<NoDebugAttr>())
-    DebugInfo = NULL; // disable debug info indefinitely for this function
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
 
   llvm::Function *Fn = CGM.getObjCRuntime().GenerateMethod(OMD, CD);
 
@@ -480,13 +476,13 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
   args.push_back(OMD->getSelfDecl());
   args.push_back(OMD->getCmdDecl());
 
-  for (ObjCMethodDecl::param_const_iterator PI = OMD->param_begin(),
-         E = OMD->param_end(); PI != E; ++PI)
-    args.push_back(*PI);
+  for (const auto *PI : OMD->params())
+    args.push_back(PI);
 
   CurGD = OMD;
 
-  StartFunction(OMD, OMD->getResultType(), Fn, FI, args, StartLoc);
+  StartFunction(OMD, OMD->getReturnType(), Fn, FI, args,
+                OMD->getLocation(), StartLoc);
 
   // In ARC, certain methods get an extra cleanup.
   if (CGM.getLangOpts().ObjCAutoRefCount &&
@@ -506,8 +502,14 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
 /// its pointer, name, and types registered in the class struture.
 void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) {
   StartObjCMethod(OMD, OMD->getClassInterface(), OMD->getLocStart());
-  EmitStmt(OMD->getBody());
+  PGO.assignRegionCounters(OMD, CurFn);
+  assert(isa<CompoundStmt>(OMD->getBody()));
+  RegionCounter Cnt = getPGORegionCounter(OMD->getBody());
+  Cnt.beginRegion(Builder);
+  EmitCompoundStmtWithoutScope(*cast<CompoundStmt>(OMD->getBody()));
   FinishFunction(OMD->getBodyRBrace());
+  PGO.emitInstrumentationData();
+  PGO.destroyRegionCounters();
 }
 
 /// emitStructGetterCall - Call the runtime function to load a property
@@ -622,8 +624,8 @@ PropertyImplStrategy::PropertyImplStrategy(CodeGenModule &CGM,
   // Evaluate the ivar's size and alignment.
   ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
   QualType ivarType = ivar->getType();
-  llvm::tie(IvarSize, IvarAlignment)
-    = CGM.getContext().getTypeInfoInChars(ivarType);
+  std::tie(IvarSize, IvarAlignment) =
+      CGM.getContext().getTypeInfoInChars(ivarType);
 
   // If we have a copy property, we always have to use getProperty/setProperty.
   // TODO: we could actually use setProperty and an expression for non-atomics.
@@ -818,7 +820,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
   if (!hasTrivialGetExpr(propImpl)) {
     if (!AtomicHelperFn) {
       ReturnStmt ret(SourceLocation(), propImpl->getGetterCXXConstructor(),
-                     /*nrvo*/ 0);
+                     /*nrvo*/ nullptr);
       EmitReturnStmt(ret);
     }
     else {
@@ -895,16 +897,21 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
 
     // FIXME: We shouldn't need to get the function info here, the
     // runtime already should have computed it to build the function.
+    llvm::Instruction *CallInstruction;
     RValue RV = EmitCall(getTypes().arrangeFreeFunctionCall(propType, args,
                                                        FunctionType::ExtInfo(),
                                                             RequiredArgs::All),
-                         getPropertyFn, ReturnValueSlot(), args);
+                         getPropertyFn, ReturnValueSlot(), args, nullptr,
+                         &CallInstruction);
+    if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction))
+      call->setTailCall();
 
     // We need to fix the type here. Ivars with copy & retain are
     // always objects so we don't need to worry about complex or
     // aggregates.
-    RV = RValue::get(Builder.CreateBitCast(RV.getScalarVal(),
-           getTypes().ConvertType(getterMethod->getResultType())));
+    RV = RValue::get(Builder.CreateBitCast(
+        RV.getScalarVal(),
+        getTypes().ConvertType(getterMethod->getReturnType())));
 
     EmitReturnOfRValue(RV, propType);
 
@@ -955,8 +962,8 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
         }
 
         value = Builder.CreateBitCast(value, ConvertType(propType));
-        value = Builder.CreateBitCast(value, 
-                  ConvertType(GetterMethodDecl->getResultType()));
+        value = Builder.CreateBitCast(
+            value, ConvertType(GetterMethodDecl->getReturnType()));
       }
       
       EmitReturnOfRValue(RValue::get(value), propType);
@@ -1139,9 +1146,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
 
   case PropertyImplStrategy::GetSetProperty:
   case PropertyImplStrategy::SetPropertyAndExpressionGet: {
-  
-    llvm::Value *setOptimizedPropertyFn = 0;
-    llvm::Value *setPropertyFn = 0;
+
+    llvm::Value *setOptimizedPropertyFn = nullptr;
+    llvm::Value *setPropertyFn = nullptr;
     if (UseOptimizedSetter(CGM)) {
       // 10.8 and iOS 6.0 code and GC is off
       setOptimizedPropertyFn = 
@@ -1292,7 +1299,7 @@ namespace {
       : addr(addr), ivar(ivar), destroyer(destroyer),
         useEHCleanupForArray(useEHCleanupForArray) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       LValue lvalue
         = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), addr, ivar, /*CVR*/ 0);
       CGF.emitDestroy(lvalue.getAddress(), ivar->getType(), destroyer,
@@ -1324,7 +1331,7 @@ static void emitCXXDestructMethod(CodeGenFunction &CGF,
     QualType::DestructionKind dtorKind = type.isDestructedType();
     if (!dtorKind) continue;
 
-    CodeGenFunction::Destroyer *destroyer = 0;
+    CodeGenFunction::Destroyer *destroyer = nullptr;
 
     // Use a call to objc_storeStrong to destroy strong ivars, for the
     // general benefit of the tools.
@@ -1356,12 +1363,9 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP,
     // Suppress the final autorelease in ARC.
     AutoreleaseResult = false;
 
-    SmallVector<CXXCtorInitializer *, 8> IvarInitializers;
-    for (ObjCImplementationDecl::init_const_iterator B = IMP->init_begin(),
-           E = IMP->init_end(); B != E; ++B) {
-      CXXCtorInitializer *IvarInit = (*B);
+    for (const auto *IvarInit : IMP->inits()) {
       FieldDecl *Field = IvarInit->getAnyMember();
-      ObjCIvarDecl  *Ivar = cast<ObjCIvarDecl>(Field);
+      ObjCIvarDecl *Ivar = cast<ObjCIvarDecl>(Field);
       LValue LV = EmitLValueForIvar(TypeOfSelfObject(), 
                                     LoadObjCSelf(), Ivar, 0);
       EmitAggExpr(IvarInit->getInit(),
@@ -1506,9 +1510,13 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   llvm::Value *zero = llvm::Constant::getNullValue(UnsignedLongLTy);
 
   // If the limit pointer was zero to begin with, the collection is
-  // empty; skip all this.
+  // empty; skip all this. Set the branch weight assuming this has the same
+  // probability of exiting the loop as any other loop exit.
+  uint64_t EntryCount = PGO.getCurrentRegionCount();
+  RegionCounter Cnt = getPGORegionCounter(&S);
   Builder.CreateCondBr(Builder.CreateICmpEQ(initialBufferLimit, zero, "iszero"),
-                       EmptyBB, LoopInitBB);
+                       EmptyBB, LoopInitBB,
+                       PGO.createBranchWeights(EntryCount, Cnt.getCount()));
 
   // Otherwise, initialize the loop.
   EmitBlock(LoopInitBB);
@@ -1537,6 +1545,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   llvm::PHINode *count = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.count");
   count->addIncoming(initialBufferLimit, LoopInitBB);
 
+  Cnt.beginRegion(Builder);
+
   // Check whether the mutations value has changed from where it was
   // at start.  StateMutationsPtr should actually be invariant between
   // refreshes.
@@ -1644,8 +1654,12 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
     = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1));
 
   // If we haven't overrun the buffer yet, we can continue.
+  // Set the branch weights based on the simplifying assumption that this is
+  // like a while-loop, i.e., ignoring that the false branch fetches more
+  // elements and then returns to the loop.
   Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
-                       LoopBodyBB, FetchMoreBB);
+                       LoopBodyBB, FetchMoreBB,
+                       PGO.createBranchWeights(Cnt.getCount(), EntryCount));
 
   index->addIncoming(indexPlusOne, AfterBody.getBlock());
   count->addIncoming(count, AfterBody.getBlock());
@@ -1715,7 +1729,7 @@ namespace {
     CallObjCRelease(llvm::Value *object) : object(object) {}
     llvm::Value *object;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Releases at the end of the full-expression are imprecise.
       CGF.EmitARCRelease(object, ARCImpreciseLifetime);
     }
@@ -1857,7 +1871,7 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF,
   };
   llvm::CallInst *result = CGF.EmitNounwindRuntimeCall(fn, args);
 
-  if (ignored) return 0;
+  if (ignored) return nullptr;
 
   return CGF.Builder.CreateBitCast(result, origType);
 }
@@ -2055,7 +2069,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(llvm::Value *addr,
   };
   EmitNounwindRuntimeCall(fn, args);
 
-  if (ignored) return 0;
+  if (ignored) return nullptr;
   return value;
 }
 
@@ -2324,7 +2338,7 @@ namespace {
 
     CallObjCAutoreleasePoolObject(llvm::Value *token) : Token(token) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitObjCAutoreleasePoolPop(Token);
     }
   };
@@ -2333,7 +2347,7 @@ namespace {
 
     CallObjCMRRAutoreleasePoolObject(llvm::Value *token) : Token(token) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitObjCMRRAutoreleasePoolPop(Token);
     }
   };
@@ -2550,7 +2564,7 @@ tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) {
 
   // The desired result type, if it differs from the type of the
   // ultimate opaque expression.
-  llvm::Type *resultType = 0;
+  llvm::Type *resultType = nullptr;
 
   while (true) {
     e = e->IgnoreParens();
@@ -2824,9 +2838,8 @@ void CodeGenFunction::EmitObjCAutoreleasePoolStmt(
     EHStack.pushCleanup<CallObjCMRRAutoreleasePoolObject>(NormalCleanup, token);
   }
 
-  for (CompoundStmt::const_body_iterator I = S.body_begin(),
-       E = S.body_end(); I != E; ++I)
-    EmitStmt(*I);
+  for (const auto *I : S.body())
+    EmitStmt(I);
 
   if (DI)
     DI->EmitLexicalBlockEnd(Builder, S.getRBracLoc());
@@ -2857,16 +2870,16 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
                                         const ObjCPropertyImplDecl *PID) {
   if (!getLangOpts().CPlusPlus ||
       !getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
-    return 0;
+    return nullptr;
   QualType Ty = PID->getPropertyIvarDecl()->getType();
   if (!Ty->isRecordType())
-    return 0;
+    return nullptr;
   const ObjCPropertyDecl *PD = PID->getPropertyDecl();
   if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
-    return 0;
-  llvm::Constant * HelperFn = 0;
+    return nullptr;
+  llvm::Constant *HelperFn = nullptr;
   if (hasTrivialSetExpr(PID))
-    return 0;
+    return nullptr;
   assert(PID->getSetterCXXAssignment() && "SetterCXXAssignment - null");
   if ((HelperFn = CGM.getAtomicSetterHelperFnMap(Ty)))
     return HelperFn;
@@ -2877,27 +2890,25 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
   FunctionDecl *FD = FunctionDecl::Create(C,
                                           C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false,
                                           false);
-  
+
   QualType DestTy = C.getPointerType(Ty);
   QualType SrcTy = Ty;
   SrcTy.addConst();
   SrcTy = C.getPointerType(SrcTy);
   
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(FD, SourceLocation(), 0, DestTy);
+  ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
   args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(FD, SourceLocation(), 0, SrcTy);
+  ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
-  
-  const CGFunctionInfo &FI =
-    CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
-                                              FunctionType::ExtInfo(),
-                                              RequiredArgs::All);
-  
+
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
   
   llvm::Function *Fn =
@@ -2905,7 +2916,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
                            "__assign_helper_atomic_property_",
                            &CGM.getModule());
   
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   
   DeclRefExpr DstExpr(&dstDecl, false, DestTy,
                       VK_RValue, SourceLocation());
@@ -2936,17 +2947,17 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
                                             const ObjCPropertyImplDecl *PID) {
   if (!getLangOpts().CPlusPlus ||
       !getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
-    return 0;
+    return nullptr;
   const ObjCPropertyDecl *PD = PID->getPropertyDecl();
   QualType Ty = PD->getType();
   if (!Ty->isRecordType())
-    return 0;
+    return nullptr;
   if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
-    return 0;
-  llvm::Constant * HelperFn = 0;
-  
+    return nullptr;
+  llvm::Constant *HelperFn = nullptr;
+
   if (hasTrivialGetExpr(PID))
-    return 0;
+    return nullptr;
   assert(PID->getGetterCXXConstructor() && "getGetterCXXConstructor - null");
   if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty)))
     return HelperFn;
@@ -2958,34 +2969,32 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
   FunctionDecl *FD = FunctionDecl::Create(C,
                                           C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false,
                                           false);
-  
+
   QualType DestTy = C.getPointerType(Ty);
   QualType SrcTy = Ty;
   SrcTy.addConst();
   SrcTy = C.getPointerType(SrcTy);
   
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(FD, SourceLocation(), 0, DestTy);
+  ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
   args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(FD, SourceLocation(), 0, SrcTy);
+  ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
-  
-  const CGFunctionInfo &FI =
-  CGM.getTypes().arrangeFunctionDeclaration(C.VoidTy, args,
-                                            FunctionType::ExtInfo(),
-                                            RequiredArgs::All);
-  
+
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
   
   llvm::Function *Fn =
   llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
                          "__copy_helper_atomic_property_", &CGM.getModule());
   
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   
   DeclRefExpr SrcExpr(&srcDecl, false, SrcTy,
                       VK_RValue, SourceLocation());
@@ -3012,6 +3021,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
                              ConstructorArgs,
                              CXXConstExpr->hadMultipleCandidates(),
                              CXXConstExpr->isListInitialization(),
+                             CXXConstExpr->isStdInitListInitialization(),
                              CXXConstExpr->requiresZeroInitialization(),
                              CXXConstExpr->getConstructionKind(),
                              SourceRange());
@@ -3050,11 +3060,11 @@ CodeGenFunction::EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty) {
   RValue Result;
   Result = Runtime.GenerateMessageSend(*this, ReturnValueSlot(),
                                        Ty, CopySelector,
-                                       Val, CallArgList(), 0, 0);
+                                       Val, CallArgList(), nullptr, nullptr);
   Val = Result.getScalarVal();
   Result = Runtime.GenerateMessageSend(*this, ReturnValueSlot(),
                                        Ty, AutoreleaseSelector,
-                                       Val, CallArgList(), 0, 0);
+                                       Val, CallArgList(), nullptr, nullptr);
   Val = Result.getScalarVal();
   return Val;
 }
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index a7ab8507014f..619a66ab4a69 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -27,11 +27,11 @@
 #include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Compiler.h"
 #include <cstdarg>
 
@@ -53,7 +53,8 @@ class LazyRuntimeFunction {
     /// Constructor leaves this class uninitialized, because it is intended to
     /// be used as a field in another class and not all of the types that are
     /// used as arguments will necessarily be available at construction time.
-    LazyRuntimeFunction() : CGM(0), FunctionName(0), Function(0) {}
+    LazyRuntimeFunction()
+      : CGM(nullptr), FunctionName(nullptr), Function(nullptr) {}
 
     /// Initialises the lazy function with the name, return type, and the types
     /// of the arguments.
@@ -62,7 +63,7 @@ class LazyRuntimeFunction {
         llvm::Type *RetTy, ...) {
        CGM =Mod;
        FunctionName = name;
-       Function = 0;
+       Function = nullptr;
        ArgTys.clear();
        va_list Args;
        va_start(Args, RetTy);
@@ -76,7 +77,7 @@ class LazyRuntimeFunction {
    /// LLVM constant.
    operator llvm::Constant*() {
      if (!Function) {
-       if (0 == FunctionName) return 0;
+       if (!FunctionName) return nullptr;
        // We put the return type on the end of the vector, so pop it back off
        llvm::Type *RetTy = ArgTys.back();
        ArgTys.pop_back();
@@ -236,9 +237,8 @@ protected:
       NameAndAttributes += TypeStr;
       NameAndAttributes += '\0';
       NameAndAttributes += PD->getNameAsString();
-      NameAndAttributes += '\0';
       return llvm::ConstantExpr::getGetElementPtr(
-          CGM.GetAddrOfConstantString(NameAndAttributes), Zeros);
+          CGM.GetAddrOfConstantCString(NameAndAttributes), Zeros);
     }
     return MakeConstantString(PD->getNameAsString());
   }
@@ -479,103 +479,92 @@ public:
   CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
       unsigned protocolClassVersion);
 
-  virtual llvm::Constant *GenerateConstantString(const StringLiteral *);
+  llvm::Constant *GenerateConstantString(const StringLiteral *) override;
 
-  virtual RValue
-  GenerateMessageSend(CodeGenFunction &CGF,
-                      ReturnValueSlot Return,
-                      QualType ResultType,
-                      Selector Sel,
-                      llvm::Value *Receiver,
-                      const CallArgList &CallArgs,
+  RValue
+  GenerateMessageSend(CodeGenFunction &CGF, ReturnValueSlot Return,
+                      QualType ResultType, Selector Sel,
+                      llvm::Value *Receiver, const CallArgList &CallArgs,
                       const ObjCInterfaceDecl *Class,
-                      const ObjCMethodDecl *Method);
-  virtual RValue
-  GenerateMessageSendSuper(CodeGenFunction &CGF,
-                           ReturnValueSlot Return,
-                           QualType ResultType,
-                           Selector Sel,
+                      const ObjCMethodDecl *Method) override;
+  RValue
+  GenerateMessageSendSuper(CodeGenFunction &CGF, ReturnValueSlot Return,
+                           QualType ResultType, Selector Sel,
                            const ObjCInterfaceDecl *Class,
-                           bool isCategoryImpl,
-                           llvm::Value *Receiver,
-                           bool IsClassMessage,
-                           const CallArgList &CallArgs,
-                           const ObjCMethodDecl *Method);
-  virtual llvm::Value *GetClass(CodeGenFunction &CGF,
-                                const ObjCInterfaceDecl *OID);
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-                                   bool lval = false);
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl
-      *Method);
-  virtual llvm::Constant *GetEHType(QualType T);
-
-  virtual llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
-                                         const ObjCContainerDecl *CD);
-  virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
-  virtual void GenerateClass(const ObjCImplementationDecl *ClassDecl);
-  virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD);
-  virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
-                                           const ObjCProtocolDecl *PD);
-  virtual void GenerateProtocol(const ObjCProtocolDecl *PD);
-  virtual llvm::Function *ModuleInitFunction();
-  virtual llvm::Constant *GetPropertyGetFunction();
-  virtual llvm::Constant *GetPropertySetFunction();
-  virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, 
-                                                          bool copy);
-  virtual llvm::Constant *GetSetStructFunction();
-  virtual llvm::Constant *GetGetStructFunction();
-  virtual llvm::Constant *GetCppAtomicObjectGetFunction();
-  virtual llvm::Constant *GetCppAtomicObjectSetFunction();
-  virtual llvm::Constant *EnumerationMutationFunction();
-
-  virtual void EmitTryStmt(CodeGenFunction &CGF,
-                           const ObjCAtTryStmt &S);
-  virtual void EmitSynchronizedStmt(CodeGenFunction &CGF,
-                                    const ObjCAtSynchronizedStmt &S);
-  virtual void EmitThrowStmt(CodeGenFunction &CGF,
-                             const ObjCAtThrowStmt &S,
-                             bool ClearInsertionPoint=true);
-  virtual llvm::Value * EmitObjCWeakRead(CodeGenFunction &CGF,
-                                         llvm::Value *AddrWeakObj);
-  virtual void EmitObjCWeakAssign(CodeGenFunction &CGF,
-                                  llvm::Value *src, llvm::Value *dst);
-  virtual void EmitObjCGlobalAssign(CodeGenFunction &CGF,
-                                    llvm::Value *src, llvm::Value *dest,
-                                    bool threadlocal=false);
-  virtual void EmitObjCIvarAssign(CodeGenFunction &CGF,
-                                    llvm::Value *src, llvm::Value *dest,
-                                    llvm::Value *ivarOffset);
-  virtual void EmitObjCStrongCastAssign(CodeGenFunction &CGF,
-                                        llvm::Value *src, llvm::Value *dest);
-  virtual void EmitGCMemmoveCollectable(CodeGenFunction &CGF,
-                                        llvm::Value *DestPtr,
-                                        llvm::Value *SrcPtr,
-                                        llvm::Value *Size);
-  virtual LValue EmitObjCValueForIvar(CodeGenFunction &CGF,
-                                      QualType ObjectTy,
-                                      llvm::Value *BaseValue,
-                                      const ObjCIvarDecl *Ivar,
-                                      unsigned CVRQualifiers);
-  virtual llvm::Value *EmitIvarOffset(CodeGenFunction &CGF,
-                                      const ObjCInterfaceDecl *Interface,
-                                      const ObjCIvarDecl *Ivar);
-  virtual llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
-  virtual llvm::Constant *BuildGCBlockLayout(CodeGenModule &CGM,
-                                             const CGBlockInfo &blockInfo) {
+                           bool isCategoryImpl, llvm::Value *Receiver,
+                           bool IsClassMessage, const CallArgList &CallArgs,
+                           const ObjCMethodDecl *Method) override;
+  llvm::Value *GetClass(CodeGenFunction &CGF,
+                        const ObjCInterfaceDecl *OID) override;
+  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+                           bool lval = false) override;
+  llvm::Value *GetSelector(CodeGenFunction &CGF,
+                           const ObjCMethodDecl *Method) override;
+  llvm::Constant *GetEHType(QualType T) override;
+
+  llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
+                                 const ObjCContainerDecl *CD) override;
+  void GenerateCategory(const ObjCCategoryImplDecl *CMD) override;
+  void GenerateClass(const ObjCImplementationDecl *ClassDecl) override;
+  void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override;
+  llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
+                                   const ObjCProtocolDecl *PD) override;
+  void GenerateProtocol(const ObjCProtocolDecl *PD) override;
+  llvm::Function *ModuleInitFunction() override;
+  llvm::Constant *GetPropertyGetFunction() override;
+  llvm::Constant *GetPropertySetFunction() override;
+  llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+                                                  bool copy) override;
+  llvm::Constant *GetSetStructFunction() override;
+  llvm::Constant *GetGetStructFunction() override;
+  llvm::Constant *GetCppAtomicObjectGetFunction() override;
+  llvm::Constant *GetCppAtomicObjectSetFunction() override;
+  llvm::Constant *EnumerationMutationFunction() override;
+
+  void EmitTryStmt(CodeGenFunction &CGF,
+                   const ObjCAtTryStmt &S) override;
+  void EmitSynchronizedStmt(CodeGenFunction &CGF,
+                            const ObjCAtSynchronizedStmt &S) override;
+  void EmitThrowStmt(CodeGenFunction &CGF,
+                     const ObjCAtThrowStmt &S,
+                     bool ClearInsertionPoint=true) override;
+  llvm::Value * EmitObjCWeakRead(CodeGenFunction &CGF,
+                                 llvm::Value *AddrWeakObj) override;
+  void EmitObjCWeakAssign(CodeGenFunction &CGF,
+                          llvm::Value *src, llvm::Value *dst) override;
+  void EmitObjCGlobalAssign(CodeGenFunction &CGF,
+                            llvm::Value *src, llvm::Value *dest,
+                            bool threadlocal=false) override;
+  void EmitObjCIvarAssign(CodeGenFunction &CGF, llvm::Value *src,
+                          llvm::Value *dest, llvm::Value *ivarOffset) override;
+  void EmitObjCStrongCastAssign(CodeGenFunction &CGF,
+                                llvm::Value *src, llvm::Value *dest) override;
+  void EmitGCMemmoveCollectable(CodeGenFunction &CGF, llvm::Value *DestPtr,
+                                llvm::Value *SrcPtr,
+                                llvm::Value *Size) override;
+  LValue EmitObjCValueForIvar(CodeGenFunction &CGF, QualType ObjectTy,
+                              llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
+                              unsigned CVRQualifiers) override;
+  llvm::Value *EmitIvarOffset(CodeGenFunction &CGF,
+                              const ObjCInterfaceDecl *Interface,
+                              const ObjCIvarDecl *Ivar) override;
+  llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) override;
+  llvm::Constant *BuildGCBlockLayout(CodeGenModule &CGM,
+                                     const CGBlockInfo &blockInfo) override {
     return NULLPtr;
   }
-  virtual llvm::Constant *BuildRCBlockLayout(CodeGenModule &CGM,
-                                             const CGBlockInfo &blockInfo) {
+  llvm::Constant *BuildRCBlockLayout(CodeGenModule &CGM,
+                                     const CGBlockInfo &blockInfo) override {
     return NULLPtr;
   }
-  
-  virtual llvm::Constant *BuildByrefLayout(CodeGenModule &CGM,
-                                           QualType T) {
+
+  llvm::Constant *BuildByrefLayout(CodeGenModule &CGM, QualType T) override {
     return NULLPtr;
   }
-  
-  virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) {
-    return 0;
+
+  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+                                       bool Weak = false) override {
+    return nullptr;
   }
 };
 /// Class representing the legacy GCC Objective-C ABI.  This is the default when
@@ -595,11 +584,9 @@ class CGObjCGCC : public CGObjCGNU {
   /// arguments.  Returns the IMP for the corresponding method.
   LazyRuntimeFunction MsgLookupSuperFn;
 protected:
-  virtual llvm::Value *LookupIMP(CodeGenFunction &CGF,
-                                 llvm::Value *&Receiver,
-                                 llvm::Value *cmd,
-                                 llvm::MDNode *node,
-                                 MessageSendInfo &MSI) {
+  llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
+                         llvm::Value *cmd, llvm::MDNode *node,
+                         MessageSendInfo &MSI) override {
     CGBuilderTy &Builder = CGF.Builder;
     llvm::Value *args[] = {
             EnforceType(Builder, Receiver, IdTy),
@@ -608,10 +595,8 @@ protected:
     imp->setMetadata(msgSendMDKind, node);
     return imp.getInstruction();
   }
-  virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF,
-                                      llvm::Value *ObjCSuper,
-                                      llvm::Value *cmd,
-                                      MessageSendInfo &MSI) {
+  llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+                              llvm::Value *cmd, MessageSendInfo &MSI) override {
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
           PtrToObjCSuperTy), cmd};
@@ -620,10 +605,11 @@ protected:
   public:
     CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
       // IMP objc_msg_lookup(id, SEL);
-      MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, NULL);
+      MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
+                       nullptr);
       // IMP objc_msg_lookup_super(struct objc_super*, SEL);
       MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-              PtrToObjCSuperTy, SelectorTy, NULL);
+              PtrToObjCSuperTy, SelectorTy, nullptr);
     }
 };
 /// Class used when targeting the new GNUstep runtime ABI.
@@ -654,13 +640,11 @@ class CGObjCGNUstep : public CGObjCGNU {
     /// lookup functions.
     llvm::Type *SlotTy;
   public:
-    virtual llvm::Constant *GetEHType(QualType T);
+    llvm::Constant *GetEHType(QualType T) override;
   protected:
-    virtual llvm::Value *LookupIMP(CodeGenFunction &CGF,
-                                   llvm::Value *&Receiver,
-                                   llvm::Value *cmd,
-                                   llvm::MDNode *node,
-                                   MessageSendInfo &MSI) {
+    llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
+                           llvm::Value *cmd, llvm::MDNode *node,
+                           MessageSendInfo &MSI) override {
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Function *LookupFn = SlotLookupFn;
 
@@ -696,10 +680,9 @@ class CGObjCGNUstep : public CGObjCGNU {
       Receiver = Builder.CreateLoad(ReceiverPtr, true);
       return imp;
     }
-    virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF,
-                                        llvm::Value *ObjCSuper,
-                                        llvm::Value *cmd,
-                                        MessageSendInfo &MSI) {
+    llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+                                llvm::Value *cmd,
+                                MessageSendInfo &MSI) override {
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Value *lookupArgs[] = {ObjCSuper, cmd};
 
@@ -714,68 +697,68 @@ class CGObjCGNUstep : public CGObjCGNU {
       const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
 
       llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy,
-          PtrTy, PtrTy, IntTy, IMPTy, NULL);
+          PtrTy, PtrTy, IntTy, IMPTy, nullptr);
       SlotTy = llvm::PointerType::getUnqual(SlotStructTy);
       // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender);
       SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy,
-          SelectorTy, IdTy, NULL);
+          SelectorTy, IdTy, nullptr);
       // Slot_t objc_msg_lookup_super(struct objc_super*, SEL);
       SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
-              PtrToObjCSuperTy, SelectorTy, NULL);
+              PtrToObjCSuperTy, SelectorTy, nullptr);
       // If we're in ObjC++ mode, then we want to make 
       if (CGM.getLangOpts().CPlusPlus) {
         llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
         // void *__cxa_begin_catch(void *e)
-        EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, NULL);
+        EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, nullptr);
         // void __cxa_end_catch(void)
-        ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, NULL);
+        ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, nullptr);
         // void _Unwind_Resume_or_Rethrow(void*)
         ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy,
-            PtrTy, NULL);
+            PtrTy, nullptr);
       } else if (R.getVersion() >= VersionTuple(1, 7)) {
         llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
         // id objc_begin_catch(void *e)
-        EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, NULL);
+        EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, nullptr);
         // void objc_end_catch(void)
-        ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, NULL);
+        ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, nullptr);
         // void _Unwind_Resume_or_Rethrow(void*)
         ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy,
-            PtrTy, NULL);
+            PtrTy, nullptr);
       }
       llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
       SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy,
-          SelectorTy, IdTy, PtrDiffTy, NULL);
+          SelectorTy, IdTy, PtrDiffTy, nullptr);
       SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy,
-          IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+          IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
       SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy,
-          IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+          IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
       SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy",
-          VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+          VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
       // void objc_setCppObjectAtomic(void *dest, const void *src, void
       // *helper);
       CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy,
-          PtrTy, PtrTy, NULL);
+          PtrTy, PtrTy, nullptr);
       // void objc_getCppObjectAtomic(void *dest, const void *src, void
       // *helper);
       CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
-          PtrTy, PtrTy, NULL);
+          PtrTy, PtrTy, nullptr);
     }
-    virtual llvm::Constant *GetCppAtomicObjectGetFunction() {
+    llvm::Constant *GetCppAtomicObjectGetFunction() override {
       // The optimised functions were added in version 1.7 of the GNUstep
       // runtime.
       assert (CGM.getLangOpts().ObjCRuntime.getVersion() >=
           VersionTuple(1, 7));
       return CxxAtomicObjectGetFn;
     }
-    virtual llvm::Constant *GetCppAtomicObjectSetFunction() {
+    llvm::Constant *GetCppAtomicObjectSetFunction() override {
       // The optimised functions were added in version 1.7 of the GNUstep
       // runtime.
       assert (CGM.getLangOpts().ObjCRuntime.getVersion() >=
           VersionTuple(1, 7));
       return CxxAtomicObjectSetFn;
     }
-    virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
-                                                            bool copy) {
+    llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+                                                    bool copy) override {
       // The optimised property functions omit the GC check, and so are not
       // safe to use in GC mode.  The standard functions are fast in GC mode,
       // so there is less advantage in using them.
@@ -789,10 +772,8 @@ class CGObjCGNUstep : public CGObjCGNU {
         if (copy) return SetPropertyAtomicCopy;
         return SetPropertyAtomic;
       }
-      if (copy) return SetPropertyNonAtomicCopy;
-      return SetPropertyNonAtomic;
 
-      return 0;
+      return copy ? SetPropertyNonAtomicCopy : SetPropertyNonAtomic;
     }
 };
 
@@ -810,11 +791,9 @@ protected:
   /// arguments.  Returns the IMP for the corresponding method.
   LazyRuntimeFunction MsgLookupSuperFn, MsgLookupSuperFnSRet;
 
-  virtual llvm::Value *LookupIMP(CodeGenFunction &CGF,
-                                 llvm::Value *&Receiver,
-                                 llvm::Value *cmd,
-                                 llvm::MDNode *node,
-                                 MessageSendInfo &MSI) {
+  llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
+                         llvm::Value *cmd, llvm::MDNode *node,
+                         MessageSendInfo &MSI) override {
     CGBuilderTy &Builder = CGF.Builder;
     llvm::Value *args[] = {
             EnforceType(Builder, Receiver, IdTy),
@@ -830,10 +809,8 @@ protected:
     return imp.getInstruction();
   }
 
-  virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF,
-                                      llvm::Value *ObjCSuper,
-                                      llvm::Value *cmd,
-                                      MessageSendInfo &MSI) {
+  llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+                              llvm::Value *cmd, MessageSendInfo &MSI) override {
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
           PtrToObjCSuperTy), cmd};
@@ -844,8 +821,8 @@ protected:
         return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
     }
 
-  virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF,
-                                     const std::string &Name, bool isWeak) {
+  llvm::Value *GetClassNamed(CodeGenFunction &CGF,
+                             const std::string &Name, bool isWeak) override {
     if (isWeak)
       return CGObjCGNU::GetClassNamed(CGF, Name, isWeak);
 
@@ -858,7 +835,7 @@ protected:
     if (!ClassSymbol)
       ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false,
                                              llvm::GlobalValue::ExternalLinkage,
-                                             0, SymbolName);
+                                             nullptr, SymbolName);
 
     return ClassSymbol;
   }
@@ -866,14 +843,14 @@ protected:
 public:
   CGObjCObjFW(CodeGenModule &Mod): CGObjCGNU(Mod, 9, 3) {
     // IMP objc_msg_lookup(id, SEL);
-    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, NULL);
+    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, nullptr);
     MsgLookupFnSRet.init(&CGM, "objc_msg_lookup_stret", IMPTy, IdTy,
-                         SelectorTy, NULL);
+                         SelectorTy, nullptr);
     // IMP objc_msg_lookup_super(struct objc_super*, SEL);
     MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-                          PtrToObjCSuperTy, SelectorTy, NULL);
+                          PtrToObjCSuperTy, SelectorTy, nullptr);
     MsgLookupSuperFnSRet.init(&CGM, "objc_msg_lookup_super_stret", IMPTy,
-                              PtrToObjCSuperTy, SelectorTy, NULL);
+                              PtrToObjCSuperTy, SelectorTy, nullptr);
   }
 };
 } // end anonymous namespace
@@ -891,7 +868,8 @@ void CGObjCGNU::EmitClassRef(const std::string &className) {
   llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(symbolName);
   if (!ClassSymbol) {
     ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false,
-        llvm::GlobalValue::ExternalLinkage, 0, symbolName);
+                                           llvm::GlobalValue::ExternalLinkage,
+                                           nullptr, symbolName);
   }
   new llvm::GlobalVariable(TheModule, ClassSymbol->getType(), true,
     llvm::GlobalValue::WeakAnyLinkage, ClassSymbol, symbolRef);
@@ -908,10 +886,11 @@ static std::string SymbolNameForMethod(const StringRef &ClassName,
 }
 
 CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
-    unsigned protocolClassVersion)
+                     unsigned protocolClassVersion)
   : CGObjCRuntime(cgm), TheModule(CGM.getModule()),
-    VMContext(cgm.getLLVMContext()), ClassPtrAlias(0), MetaClassPtrAlias(0),
-    RuntimeVersion(runtimeABIVersion), ProtocolVersion(protocolClassVersion) {
+    VMContext(cgm.getLLVMContext()), ClassPtrAlias(nullptr),
+    MetaClassPtrAlias(nullptr), RuntimeVersion(runtimeABIVersion),
+    ProtocolVersion(protocolClassVersion) {
 
   msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend");
 
@@ -948,7 +927,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
   Int64Ty = llvm::Type::getInt64Ty(VMContext);
 
   IntPtrTy =
-      TheModule.getPointerSize() == llvm::Module::Pointer32 ? Int32Ty : Int64Ty;
+      CGM.getDataLayout().getPointerSizeInBits() == 32 ? Int32Ty : Int64Ty;
 
   // Object type
   QualType UnqualIdTy = CGM.getContext().getObjCIdType();
@@ -961,35 +940,35 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
   }
   PtrToIdTy = llvm::PointerType::getUnqual(IdTy);
 
-  ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, NULL);
+  ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, nullptr);
   PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy);
 
   llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
 
   // void objc_exception_throw(id);
-  ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, NULL);
-  ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, NULL);
+  ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
+  ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
   // int objc_sync_enter(id);
-  SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, NULL);
+  SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, nullptr);
   // int objc_sync_exit(id);
-  SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, NULL);
+  SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, nullptr);
 
   // void objc_enumerationMutation (id)
   EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy,
-      IdTy, NULL);
+      IdTy, nullptr);
 
   // id objc_getProperty(id, SEL, ptrdiff_t, BOOL)
   GetPropertyFn.init(&CGM, "objc_getProperty", IdTy, IdTy, SelectorTy,
-      PtrDiffTy, BoolTy, NULL);
+      PtrDiffTy, BoolTy, nullptr);
   // void objc_setProperty(id, SEL, ptrdiff_t, id, BOOL, BOOL)
   SetPropertyFn.init(&CGM, "objc_setProperty", VoidTy, IdTy, SelectorTy,
-      PtrDiffTy, IdTy, BoolTy, BoolTy, NULL);
+      PtrDiffTy, IdTy, BoolTy, BoolTy, nullptr);
   // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
   GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, 
-      PtrDiffTy, BoolTy, BoolTy, NULL);
+      PtrDiffTy, BoolTy, BoolTy, nullptr);
   // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
   SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, 
-      PtrDiffTy, BoolTy, BoolTy, NULL);
+      PtrDiffTy, BoolTy, BoolTy, nullptr);
 
   // IMP type
   llvm::Type *IMPArgs[] = { IdTy, SelectorTy };
@@ -1014,20 +993,20 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
 
     // id objc_assign_ivar(id, id, ptrdiff_t);
     IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy,
-        NULL);
+        nullptr);
     // id objc_assign_strongCast (id, id*)
     StrongCastAssignFn.init(&CGM, "objc_assign_strongCast", IdTy, IdTy,
-        PtrToIdTy, NULL);
+        PtrToIdTy, nullptr);
     // id objc_assign_global(id, id*);
     GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy,
-        NULL);
+        nullptr);
     // id objc_assign_weak(id, id*);
-    WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, NULL);
+    WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, nullptr);
     // id objc_read_weak(id*);
-    WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, NULL);
+    WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, nullptr);
     // void *objc_memmove_collectable(void*, void *, size_t);
     MemMoveFn.init(&CGM, "objc_memmove_collectable", PtrTy, PtrTy, PtrTy,
-        SizeTy, NULL);
+        SizeTy, nullptr);
   }
 }
 
@@ -1066,8 +1045,7 @@ llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
     const std::string &TypeEncoding, bool lval) {
 
   SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel];
-  llvm::GlobalAlias *SelValue = 0;
-
+  llvm::GlobalAlias *SelValue = nullptr;
 
   for (SmallVectorImpl<TypedSelector>::iterator i = Types.begin(),
       e = Types.end() ; i!=e ; i++) {
@@ -1076,11 +1054,10 @@ llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
       break;
     }
   }
-  if (0 == SelValue) {
-    SelValue = new llvm::GlobalAlias(SelectorTy,
-                                     llvm::GlobalValue::PrivateLinkage,
-                                     ".objc_selector_"+Sel.getAsString(), NULL,
-                                     &TheModule);
+  if (!SelValue) {
+    SelValue = llvm::GlobalAlias::create(
+        SelectorTy->getElementType(), 0, llvm::GlobalValue::PrivateLinkage,
+        ".objc_selector_" + Sel.getAsString(), &TheModule);
     Types.push_back(TypedSelector(TypeEncoding, SelValue));
   }
 
@@ -1113,7 +1090,7 @@ llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
     if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) {
       return MakeConstantString("@id");
     } else {
-      return 0;
+      return nullptr;
     }
   }
 
@@ -1142,7 +1119,7 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
         new llvm::GlobalVariable(CGM.getModule(), PtrToInt8Ty,
                                  false,
                                  llvm::GlobalValue::ExternalLinkage,
-                                 0, "__objc_id_type_info");
+                                 nullptr, "__objc_id_type_info");
     return llvm::ConstantExpr::getBitCast(IDEHType, PtrToInt8Ty);
   }
 
@@ -1169,7 +1146,8 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
   llvm::Constant *Vtable = TheModule.getGlobalVariable(vtableName);
   if (!Vtable) {
     Vtable = new llvm::GlobalVariable(TheModule, PtrToInt8Ty, true,
-            llvm::GlobalValue::ExternalLinkage, 0, vtableName);
+                                      llvm::GlobalValue::ExternalLinkage,
+                                      nullptr, vtableName);
   }
   llvm::Constant *Two = llvm::ConstantInt::get(IntTy, 2);
   Vtable = llvm::ConstantExpr::getGetElementPtr(Vtable, Two);
@@ -1183,7 +1161,7 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
   fields.push_back(typeName);
   llvm::Constant *TI = 
       MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-              NULL), fields, "__objc_eh_typeinfo_" + className,
+              nullptr), fields, "__objc_eh_typeinfo_" + className,
           llvm::GlobalValue::LinkOnceODRLinkage);
   return llvm::ConstantExpr::getBitCast(TI, PtrToInt8Ty);
 }
@@ -1209,7 +1187,7 @@ llvm::Constant *CGObjCGNU::GenerateConstantString(const StringLiteral *SL) {
 
   if (!isa)
     isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false,
-            llvm::GlobalValue::ExternalWeakLinkage, 0, Sym);
+            llvm::GlobalValue::ExternalWeakLinkage, nullptr, Sym);
   else if (isa->getType() != PtrToIdTy)
     isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy);
 
@@ -1218,7 +1196,7 @@ llvm::Constant *CGObjCGNU::GenerateConstantString(const StringLiteral *SL) {
   Ivars.push_back(MakeConstantString(Str));
   Ivars.push_back(llvm::ConstantInt::get(IntTy, Str.size()));
   llvm::Constant *ObjCStr = MakeGlobal(
-    llvm::StructType::get(PtrToIdTy, PtrToInt8Ty, IntTy, NULL),
+    llvm::StructType::get(PtrToIdTy, PtrToInt8Ty, IntTy, nullptr),
     Ivars, ".objc_str");
   ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStr, PtrToInt8Ty);
   ObjCStrings[Str] = ObjCStr;
@@ -1247,7 +1225,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
                   CGM.getTypes().ConvertType(ResultType)));
     }
     if (Sel == ReleaseSel) {
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
   }
 
@@ -1262,9 +1240,9 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
 
   MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs);
 
-  llvm::Value *ReceiverClass = 0;
+  llvm::Value *ReceiverClass = nullptr;
   if (isCategoryImpl) {
-    llvm::Constant *classLookupFunction = 0;
+    llvm::Constant *classLookupFunction = nullptr;
     if (IsClassMessage)  {
       classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get(
             IdTy, PtrTy, true), "objc_get_meta_class");
@@ -1282,16 +1260,16 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
     // super_class pointer from either the class or metaclass structure.
     if (IsClassMessage)  {
       if (!MetaClassPtrAlias) {
-        MetaClassPtrAlias = new llvm::GlobalAlias(IdTy,
-            llvm::GlobalValue::InternalLinkage, ".objc_metaclass_ref" +
-            Class->getNameAsString(), NULL, &TheModule);
+        MetaClassPtrAlias = llvm::GlobalAlias::create(
+            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+            ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = MetaClassPtrAlias;
     } else {
       if (!ClassPtrAlias) {
-        ClassPtrAlias = new llvm::GlobalAlias(IdTy,
-            llvm::GlobalValue::InternalLinkage, ".objc_class_ref" +
-            Class->getNameAsString(), NULL, &TheModule);
+        ClassPtrAlias = llvm::GlobalAlias::create(
+            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+            ".objc_class_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = ClassPtrAlias;
     }
@@ -1299,14 +1277,14 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
   // Cast the pointer to a simplified version of the class structure
   ReceiverClass = Builder.CreateBitCast(ReceiverClass,
       llvm::PointerType::getUnqual(
-        llvm::StructType::get(IdTy, IdTy, NULL)));
+        llvm::StructType::get(IdTy, IdTy, nullptr)));
   // Get the superclass pointer
   ReceiverClass = Builder.CreateStructGEP(ReceiverClass, 1);
   // Load the superclass pointer
   ReceiverClass = Builder.CreateLoad(ReceiverClass);
   // Construct the structure used to look up the IMP
   llvm::StructType *ObjCSuperTy = llvm::StructType::get(
-      Receiver->getType(), IdTy, NULL);
+      Receiver->getType(), IdTy, nullptr);
   llvm::Value *ObjCSuper = Builder.CreateAlloca(ObjCSuperTy);
 
   Builder.CreateStore(Receiver, Builder.CreateStructGEP(ObjCSuper, 0));
@@ -1326,7 +1304,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
   llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD);
 
   llvm::Instruction *call;
-  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, 0, &call);
+  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, nullptr,
+                               &call);
   call->setMetadata(msgSendMDKind, node);
   return msgRet;
 }
@@ -1350,7 +1329,7 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
                   CGM.getTypes().ConvertType(ResultType)));
     }
     if (Sel == ReleaseSel) {
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
   }
 
@@ -1368,9 +1347,9 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
   bool isPointerSizedReturn = (ResultType->isAnyPointerType() ||
       ResultType->isIntegralOrEnumerationType() || ResultType->isVoidType());
 
-  llvm::BasicBlock *startBB = 0;
-  llvm::BasicBlock *messageBB = 0;
-  llvm::BasicBlock *continueBB = 0;
+  llvm::BasicBlock *startBB = nullptr;
+  llvm::BasicBlock *messageBB = nullptr;
+  llvm::BasicBlock *continueBB = nullptr;
 
   if (!isPointerSizedReturn) {
     startBB = Builder.GetInsertBlock();
@@ -1395,7 +1374,8 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
   llvm::Value *impMD[] = {
         llvm::MDString::get(VMContext, Sel.getAsString()),
         llvm::MDString::get(VMContext, Class ? Class->getNameAsString() :""),
-        llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), Class!=0)
+        llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext),
+                               Class!=nullptr)
    };
   llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD);
 
@@ -1438,7 +1418,8 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
   imp = EnforceType(Builder, imp, MSI.MessengerType);
 
   llvm::Instruction *call;
-  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, 0, &call);
+  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, nullptr,
+                               &call);
   call->setMetadata(msgSendMDKind, node);
 
 
@@ -1494,7 +1475,7 @@ GenerateMethodList(const StringRef &ClassName,
     PtrToInt8Ty, // Really a selector, but the runtime creates it us.
     PtrToInt8Ty, // Method types
     IMPTy, //Method pointer
-    NULL);
+    nullptr);
   std::vector<llvm::Constant*> Methods;
   std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) {
@@ -1526,7 +1507,7 @@ GenerateMethodList(const StringRef &ClassName,
       NextPtrTy,
       IntTy,
       ObjCMethodArrayTy,
-      NULL);
+      nullptr);
 
   Methods.clear();
   Methods.push_back(llvm::ConstantPointerNull::get(
@@ -1550,7 +1531,7 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
     PtrToInt8Ty,
     PtrToInt8Ty,
     IntTy,
-    NULL);
+    nullptr);
   std::vector<llvm::Constant*> Ivars;
   std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = IvarNames.size() ; i < e ; i++) {
@@ -1572,7 +1553,7 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
   // Structure containing array and array count
   llvm::StructType *ObjCIvarListTy = llvm::StructType::get(IntTy,
     ObjCIvarArrayTy,
-    NULL);
+    nullptr);
 
   // Create an instance of the structure
   return MakeGlobal(ObjCIvarListTy, Elements, ".objc_ivar_list");
@@ -1622,7 +1603,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
       Properties->getType(),  // properties
       IntPtrTy,               // strong_pointers
       IntPtrTy,               // weak_pointers
-      NULL);
+      nullptr);
   llvm::Constant *Zero = llvm::ConstantInt::get(LongTy, 0);
   // Fill in the structure
   std::vector<llvm::Constant*> Elements;
@@ -1676,7 +1657,7 @@ GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames,
   llvm::StructType *ObjCMethodDescTy = llvm::StructType::get(
     PtrToInt8Ty, // Really a selector, but the runtime does the casting for us.
     PtrToInt8Ty,
-    NULL);
+    nullptr);
   std::vector<llvm::Constant*> Methods;
   std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) {
@@ -1690,7 +1671,7 @@ GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames,
   llvm::Constant *Array = llvm::ConstantArray::get(ObjCMethodArrayTy,
                                                    Methods);
   llvm::StructType *ObjCMethodDescListTy = llvm::StructType::get(
-      IntTy, ObjCMethodArrayTy, NULL);
+      IntTy, ObjCMethodArrayTy, nullptr);
   Methods.clear();
   Methods.push_back(llvm::ConstantInt::get(IntTy, MethodNames.size()));
   Methods.push_back(Array);
@@ -1705,11 +1686,11 @@ llvm::Constant *CGObjCGNU::GenerateProtocolList(ArrayRef<std::string>Protocols){
       PtrTy, //Should be a recurisve pointer, but it's always NULL here.
       SizeTy,
       ProtocolArrayTy,
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> Elements;
   for (const std::string *iter = Protocols.begin(), *endIter = Protocols.end();
       iter != endIter ; iter++) {
-    llvm::Constant *protocol = 0;
+    llvm::Constant *protocol = nullptr;
     llvm::StringMap<llvm::Constant*>::iterator value =
       ExistingProtocols.find(*iter);
     if (value == ExistingProtocols.end()) {
@@ -1755,7 +1736,7 @@ llvm::Constant *CGObjCGNU::GenerateEmptyProtocol(
       MethodList->getType(),
       MethodList->getType(),
       MethodList->getType(),
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
@@ -1779,24 +1760,22 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
     PD = Def;
 
   SmallVector<std::string, 16> Protocols;
-  for (ObjCProtocolDecl::protocol_iterator PI = PD->protocol_begin(),
-       E = PD->protocol_end(); PI != E; ++PI)
-    Protocols.push_back((*PI)->getNameAsString());
+  for (const auto *PI : PD->protocols())
+    Protocols.push_back(PI->getNameAsString());
   SmallVector<llvm::Constant*, 16> InstanceMethodNames;
   SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
   SmallVector<llvm::Constant*, 16> OptionalInstanceMethodNames;
   SmallVector<llvm::Constant*, 16> OptionalInstanceMethodTypes;
-  for (ObjCProtocolDecl::instmeth_iterator iter = PD->instmeth_begin(),
-       E = PD->instmeth_end(); iter != E; iter++) {
+  for (const auto *I : PD->instance_methods()) {
     std::string TypeStr;
-    Context.getObjCEncodingForMethodDecl(*iter, TypeStr);
-    if ((*iter)->getImplementationControl() == ObjCMethodDecl::Optional) {
+    Context.getObjCEncodingForMethodDecl(I, TypeStr);
+    if (I->getImplementationControl() == ObjCMethodDecl::Optional) {
       OptionalInstanceMethodNames.push_back(
-          MakeConstantString((*iter)->getSelector().getAsString()));
+          MakeConstantString(I->getSelector().getAsString()));
       OptionalInstanceMethodTypes.push_back(MakeConstantString(TypeStr));
     } else {
       InstanceMethodNames.push_back(
-          MakeConstantString((*iter)->getSelector().getAsString()));
+          MakeConstantString(I->getSelector().getAsString()));
       InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
     }
   }
@@ -1805,18 +1784,16 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
   SmallVector<llvm::Constant*, 16> ClassMethodTypes;
   SmallVector<llvm::Constant*, 16> OptionalClassMethodNames;
   SmallVector<llvm::Constant*, 16> OptionalClassMethodTypes;
-  for (ObjCProtocolDecl::classmeth_iterator
-         iter = PD->classmeth_begin(), endIter = PD->classmeth_end();
-       iter != endIter ; iter++) {
+  for (const auto *I : PD->class_methods()) {
     std::string TypeStr;
-    Context.getObjCEncodingForMethodDecl((*iter),TypeStr);
-    if ((*iter)->getImplementationControl() == ObjCMethodDecl::Optional) {
+    Context.getObjCEncodingForMethodDecl(I,TypeStr);
+    if (I->getImplementationControl() == ObjCMethodDecl::Optional) {
       OptionalClassMethodNames.push_back(
-          MakeConstantString((*iter)->getSelector().getAsString()));
+          MakeConstantString(I->getSelector().getAsString()));
       OptionalClassMethodTypes.push_back(MakeConstantString(TypeStr));
     } else {
       ClassMethodNames.push_back(
-          MakeConstantString((*iter)->getSelector().getAsString()));
+          MakeConstantString(I->getSelector().getAsString()));
       ClassMethodTypes.push_back(MakeConstantString(TypeStr));
     }
   }
@@ -1840,19 +1817,16 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
   // structures for protocol metadata everywhere.
   llvm::StructType *PropertyMetadataTy = llvm::StructType::get(
           PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
-          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, NULL);
+          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, nullptr);
   std::vector<llvm::Constant*> Properties;
   std::vector<llvm::Constant*> OptionalProperties;
 
   // Add all of the property methods need adding to the method list and to the
   // property metadata list.
-  for (ObjCContainerDecl::prop_iterator
-         iter = PD->prop_begin(), endIter = PD->prop_end();
-       iter != endIter ; iter++) {
+  for (auto *property : PD->properties()) {
     std::vector<llvm::Constant*> Fields;
-    ObjCPropertyDecl *property = *iter;
 
-    Fields.push_back(MakePropertyEncodingString(property, 0));
+    Fields.push_back(MakePropertyEncodingString(property, nullptr));
     PushPropertyAttributes(Fields, property);
 
     if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) {
@@ -1919,7 +1893,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
       OptionalClassMethodList->getType(),
       PropertyList->getType(),
       OptionalPropertyList->getType(),
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
@@ -1960,7 +1934,7 @@ void CGObjCGNU::GenerateProtocolHolderCategory() {
       PtrTy, //Should be a recurisve pointer, but it's always NULL here.
       SizeTy,
       ProtocolArrayTy,
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> ProtocolElements;
   for (llvm::StringMapIterator<llvm::Constant*> iter =
        ExistingProtocols.begin(), endIter = ExistingProtocols.end();
@@ -1980,7 +1954,7 @@ void CGObjCGNU::GenerateProtocolHolderCategory() {
                   ProtocolElements, ".objc_protocol_list"), PtrTy));
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-            PtrTy, PtrTy, PtrTy, NULL), Elements), PtrTy));
+            PtrTy, PtrTy, PtrTy, nullptr), Elements), PtrTy));
 }
 
 /// Libobjc2 uses a bitfield representation where small(ish) bitfields are
@@ -1996,8 +1970,7 @@ void CGObjCGNU::GenerateProtocolHolderCategory() {
 /// bitfield / with the 63rd bit set will be 1<<64.
 llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
   int bitCount = bits.size();
-  int ptrBits =
-        (TheModule.getPointerSize() == llvm::Module::Pointer32) ? 32 : 64;
+  int ptrBits = CGM.getDataLayout().getPointerSizeInBits();
   if (bitCount < ptrBits) {
     uint64_t val = 1;
     for (int i=0 ; i<bitCount ; ++i) {
@@ -2021,7 +1994,7 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
       llvm::ConstantInt::get(Int32Ty, values.size()),
       array };
   llvm::Constant *GS = MakeGlobal(llvm::StructType::get(Int32Ty, arrayTy,
-        NULL), fields);
+        nullptr), fields);
   llvm::Constant *ptr = llvm::ConstantExpr::getPtrToInt(GS, IntPtrTy);
   return ptr;
 }
@@ -2032,24 +2005,20 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   // Collect information about instance methods
   SmallVector<Selector, 16> InstanceMethodSels;
   SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
-  for (ObjCCategoryImplDecl::instmeth_iterator
-         iter = OCD->instmeth_begin(), endIter = OCD->instmeth_end();
-       iter != endIter ; iter++) {
-    InstanceMethodSels.push_back((*iter)->getSelector());
+  for (const auto *I : OCD->instance_methods()) {
+    InstanceMethodSels.push_back(I->getSelector());
     std::string TypeStr;
-    CGM.getContext().getObjCEncodingForMethodDecl(*iter,TypeStr);
+    CGM.getContext().getObjCEncodingForMethodDecl(I,TypeStr);
     InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
   }
 
   // Collect information about class methods
   SmallVector<Selector, 16> ClassMethodSels;
   SmallVector<llvm::Constant*, 16> ClassMethodTypes;
-  for (ObjCCategoryImplDecl::classmeth_iterator
-         iter = OCD->classmeth_begin(), endIter = OCD->classmeth_end();
-       iter != endIter ; iter++) {
-    ClassMethodSels.push_back((*iter)->getSelector());
+  for (const auto *I : OCD->class_methods()) {
+    ClassMethodSels.push_back(I->getSelector());
     std::string TypeStr;
-    CGM.getContext().getObjCEncodingForMethodDecl(*iter,TypeStr);
+    CGM.getContext().getObjCEncodingForMethodDecl(I,TypeStr);
     ClassMethodTypes.push_back(MakeConstantString(TypeStr));
   }
 
@@ -2077,7 +2046,7 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
         GenerateProtocolList(Protocols), PtrTy));
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-            PtrTy, PtrTy, PtrTy, NULL), Elements), PtrTy));
+            PtrTy, PtrTy, PtrTy, nullptr), Elements), PtrTy));
 }
 
 llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OID,
@@ -2088,17 +2057,14 @@ llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OI
   // setter name, setter types, getter name, getter types.
   llvm::StructType *PropertyMetadataTy = llvm::StructType::get(
           PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
-          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, NULL);
+          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, nullptr);
   std::vector<llvm::Constant*> Properties;
 
   // Add all of the property methods need adding to the method list and to the
   // property metadata list.
-  for (ObjCImplDecl::propimpl_iterator
-         iter = OID->propimpl_begin(), endIter = OID->propimpl_end();
-       iter != endIter ; iter++) {
+  for (auto *propertyImpl : OID->property_impls()) {
     std::vector<llvm::Constant*> Fields;
-    ObjCPropertyDecl *property = iter->getPropertyDecl();
-    ObjCPropertyImplDecl *propertyImpl = *iter;
+    ObjCPropertyDecl *property = propertyImpl->getPropertyDecl();
     bool isSynthesized = (propertyImpl->getPropertyImplementation() == 
         ObjCPropertyImplDecl::Synthesize);
     bool isDynamic = (propertyImpl->getPropertyImplementation() == 
@@ -2265,12 +2231,10 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   // Collect information about instance methods
   SmallVector<Selector, 16> InstanceMethodSels;
   SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
-  for (ObjCImplementationDecl::instmeth_iterator
-         iter = OID->instmeth_begin(), endIter = OID->instmeth_end();
-       iter != endIter ; iter++) {
-    InstanceMethodSels.push_back((*iter)->getSelector());
+  for (const auto *I : OID->instance_methods()) {
+    InstanceMethodSels.push_back(I->getSelector());
     std::string TypeStr;
-    Context.getObjCEncodingForMethodDecl((*iter),TypeStr);
+    Context.getObjCEncodingForMethodDecl(I,TypeStr);
     InstanceMethodTypes.push_back(MakeConstantString(TypeStr));
   }
 
@@ -2281,22 +2245,16 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   // Collect information about class methods
   SmallVector<Selector, 16> ClassMethodSels;
   SmallVector<llvm::Constant*, 16> ClassMethodTypes;
-  for (ObjCImplementationDecl::classmeth_iterator
-         iter = OID->classmeth_begin(), endIter = OID->classmeth_end();
-       iter != endIter ; iter++) {
-    ClassMethodSels.push_back((*iter)->getSelector());
+  for (const auto *I : OID->class_methods()) {
+    ClassMethodSels.push_back(I->getSelector());
     std::string TypeStr;
-    Context.getObjCEncodingForMethodDecl((*iter),TypeStr);
+    Context.getObjCEncodingForMethodDecl(I,TypeStr);
     ClassMethodTypes.push_back(MakeConstantString(TypeStr));
   }
   // Collect the names of referenced protocols
   SmallVector<std::string, 16> Protocols;
-  for (ObjCInterfaceDecl::protocol_iterator
-         I = ClassDecl->protocol_begin(),
-         E = ClassDecl->protocol_end(); I != E; ++I)
-    Protocols.push_back((*I)->getNameAsString());
-
-
+  for (const auto *I : ClassDecl->protocols())
+    Protocols.push_back(I->getNameAsString());
 
   // Get the superclass pointer.
   llvm::Constant *SuperClass;
@@ -2326,7 +2284,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   // the offset (third field in ivar structure)
   llvm::Type *IndexTy = Int32Ty;
   llvm::Constant *offsetPointerIndexes[] = {Zeros[0],
-      llvm::ConstantInt::get(IndexTy, 1), 0,
+      llvm::ConstantInt::get(IndexTy, 1), nullptr,
       llvm::ConstantInt::get(IndexTy, 2) };
 
   unsigned ivarIndex = 0;
@@ -2357,14 +2315,14 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0);
   //Generate metaclass for class methods
   llvm::Constant *MetaClassStruct = GenerateClassStructure(NULLPtr,
-      NULLPtr, 0x12L, ClassName.c_str(), 0, Zeros[0], GenerateIvarList(
+      NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], GenerateIvarList(
         empty, empty, empty), ClassMethodList, NULLPtr,
       NULLPtr, NULLPtr, ZeroPtr, ZeroPtr, true);
 
   // Generate the class structure
   llvm::Constant *ClassStruct =
     GenerateClassStructure(MetaClassStruct, SuperClass, 0x11L,
-                           ClassName.c_str(), 0,
+                           ClassName.c_str(), nullptr,
       llvm::ConstantInt::get(LongTy, instanceSize), IvarList,
       MethodList, GenerateProtocolList(Protocols), IvarOffsetArray,
       Properties, StrongIvarBitmap, WeakIvarBitmap);
@@ -2374,13 +2332,13 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
     ClassPtrAlias->replaceAllUsesWith(
         llvm::ConstantExpr::getBitCast(ClassStruct, IdTy));
     ClassPtrAlias->eraseFromParent();
-    ClassPtrAlias = 0;
+    ClassPtrAlias = nullptr;
   }
   if (MetaClassPtrAlias) {
     MetaClassPtrAlias->replaceAllUsesWith(
         llvm::ConstantExpr::getBitCast(MetaClassStruct, IdTy));
     MetaClassPtrAlias->eraseFromParent();
-    MetaClassPtrAlias = 0;
+    MetaClassPtrAlias = nullptr;
   }
 
   // Add class structure to list to be added to the symtab later
@@ -2393,7 +2351,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   // Only emit an ObjC load function if no Objective-C stuff has been called
   if (Classes.empty() && Categories.empty() && ConstantStrings.empty() &&
       ExistingProtocols.empty() && SelectorTable.empty())
-    return NULL;
+    return nullptr;
 
   // Add all referenced protocols to a category.
   GenerateProtocolHolderCategory();
@@ -2401,8 +2359,8 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   llvm::StructType *SelStructTy = dyn_cast<llvm::StructType>(
           SelectorTy->getElementType());
   llvm::Type *SelStructPtrTy = SelectorTy;
-  if (SelStructTy == 0) {
-    SelStructTy = llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, NULL);
+  if (!SelStructTy) {
+    SelStructTy = llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, nullptr);
     SelStructPtrTy = llvm::PointerType::getUnqual(SelStructTy);
   }
 
@@ -2423,7 +2381,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
     Elements.push_back(llvm::ConstantArray::get(StaticsArrayTy,
        ConstantStrings));
     llvm::StructType *StaticsListTy =
-      llvm::StructType::get(PtrToInt8Ty, StaticsArrayTy, NULL);
+      llvm::StructType::get(PtrToInt8Ty, StaticsArrayTy, nullptr);
     llvm::Type *StaticsListPtrTy =
       llvm::PointerType::getUnqual(StaticsListTy);
     Statics = MakeGlobal(StaticsListTy, Elements, ".objc_statics");
@@ -2441,7 +2399,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   llvm::StructType *SymTabTy = llvm::StructType::get(LongTy, SelStructPtrTy,
                                                      llvm::Type::getInt16Ty(VMContext),
                                                      llvm::Type::getInt16Ty(VMContext),
-                                                     ClassListTy, NULL);
+                                                     ClassListTy, nullptr);
 
   Elements.clear();
   // Pointer to an array of selectors used in this module.
@@ -2522,7 +2480,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   // constants
   llvm::StructType * ModuleTy = llvm::StructType::get(LongTy, LongTy,
       PtrToInt8Ty, llvm::PointerType::getUnqual(SymTabTy), 
-      (RuntimeVersion >= 10) ? IntTy : NULL, NULL);
+      (RuntimeVersion >= 10) ? IntTy : nullptr, nullptr);
   Elements.clear();
   // Runtime version, used for ABI compatibility checking.
   Elements.push_back(llvm::ConstantInt::get(LongTy, RuntimeVersion));
@@ -2595,7 +2553,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
             llvm::Constant::getNullValue(RegisterAlias->getType()));
     Builder.CreateCondBr(HasRegisterAlias, AliasBB, NoAliasBB);
 
-    // The true branch (has alias registration fucntion):
+    // The true branch (has alias registration function):
     Builder.SetInsertPoint(AliasBB);
     // Emit alias registration calls:
     for (std::vector<ClassAliasPair>::iterator iter = ClassAliases.begin();
@@ -2603,7 +2561,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
        llvm::Constant *TheClass =
          TheModule.getGlobalVariable(("_OBJC_CLASS_" + iter->first).c_str(),
             true);
-       if (0 != TheClass) {
+       if (TheClass) {
          TheClass = llvm::ConstantExpr::getBitCast(TheClass, PtrTy);
          Builder.CreateCall2(RegisterAlias, TheClass,
             MakeConstantString(iter->second));
@@ -2653,7 +2611,7 @@ llvm::Constant *CGObjCGNU::GetPropertySetFunction() {
 
 llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic,
                                                            bool copy) {
-  return 0;
+  return nullptr;
 }
 
 llvm::Constant *CGObjCGNU::GetGetStructFunction() {
@@ -2663,10 +2621,10 @@ llvm::Constant *CGObjCGNU::GetSetStructFunction() {
   return SetStructPropertyFn;
 }
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() {
-  return 0;
+  return nullptr;
 }
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() {
-  return 0;
+  return nullptr;
 }
 
 llvm::Constant *CGObjCGNU::EnumerationMutationFunction() {
@@ -2814,7 +2772,7 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
     } else {
       IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
               llvm::Type::getInt32PtrTy(VMContext), false,
-              llvm::GlobalValue::ExternalLinkage, 0, Name);
+              llvm::GlobalValue::ExternalLinkage, nullptr, Name);
     }
   }
   return IvarOffsetPointer;
@@ -2844,7 +2802,7 @@ static const ObjCInterfaceDecl *FindIvarInterface(ASTContext &Context,
   if (const ObjCInterfaceDecl *Super = OID->getSuperClass())
     return FindIvarInterface(Context, Super, OIVD);
 
-  return 0;
+  return nullptr;
 }
 
 llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 2b2a5b837608..6f0979d06c53 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -29,12 +29,12 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 
@@ -174,6 +174,7 @@ protected:
 public:
   llvm::Type *ShortTy, *IntTy, *LongTy, *LongLongTy;
   llvm::Type *Int8PtrTy, *Int8PtrPtrTy;
+  llvm::Type *IvarOffsetVarTy;
 
   /// ObjectPtrTy - LLVM type for object handles (typeof(id))
   llvm::Type *ObjectPtrTy;
@@ -243,8 +244,8 @@ public:
     Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(IdType, Params,
-                                                    FunctionType::ExtInfo(),
+      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(IdType, false, Params,
+                                                          FunctionType::ExtInfo(),
                                                           RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_getProperty");
   }
@@ -263,8 +264,9 @@ public:
     Params.push_back(Ctx.BoolTy);
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
-                                                     FunctionType::ExtInfo(),
+      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+                                                          Params,
+                                                          FunctionType::ExtInfo(),
                                                           RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_setProperty");
   }
@@ -289,7 +291,8 @@ public:
     Params.push_back(IdType);
     Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
     llvm::FunctionType *FTy =
-    Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
+    Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+                                                        Params,
                                                         FunctionType::ExtInfo(),
                                                         RequiredArgs::All));
     const char *name;
@@ -316,8 +319,9 @@ public:
     Params.push_back(Ctx.BoolTy);
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
-                                                     FunctionType::ExtInfo(),
+      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+                                                          Params,
+                                                          FunctionType::ExtInfo(),
                                                           RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct");
   }
@@ -335,8 +339,9 @@ public:
     Params.push_back(Ctx.VoidPtrTy);
     Params.push_back(Ctx.VoidPtrTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
-                                                     FunctionType::ExtInfo(),
+      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+                                                          Params,
+                                                          FunctionType::ExtInfo(),
                                                           RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic");
   }
@@ -348,8 +353,9 @@ public:
     SmallVector<CanQualType,1> Params;
     Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType()));
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params,
-                                                      FunctionType::ExtInfo(),
+      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+                                                          Params,
+                                                          FunctionType::ExtInfo(),
                                                       RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation");
   }
@@ -849,7 +855,7 @@ protected:
   llvm::SetVector<IdentifierInfo*> DefinedSymbols;
 
   /// ClassNames - uniqued class names.
-  llvm::DenseMap<IdentifierInfo*, llvm::GlobalVariable*> ClassNames;
+  llvm::StringMap<llvm::GlobalVariable*> ClassNames;
 
   /// MethodVarNames - uniqued method variable names.
   llvm::DenseMap<Selector, llvm::GlobalVariable*> MethodVarNames;
@@ -885,6 +891,9 @@ protected:
 
   /// DefinedClasses - List of defined classes.
   SmallVector<llvm::GlobalValue*, 16> DefinedClasses;
+  
+  /// ImplementedClasses - List of @implemented classes.
+  SmallVector<const ObjCInterfaceDecl*, 16> ImplementedClasses;
 
   /// DefinedNonLazyClasses - List of defined "non-lazy" classes.
   SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyClasses;
@@ -923,8 +932,9 @@ protected:
                                         const Decl *Container);
 
   /// GetClassName - Return a unique constant for the given selector's
-  /// name. The return value has type char *.
-  llvm::Constant *GetClassName(IdentifierInfo *Ident);
+  /// runtime name (which may change via use of objc_runtime_name attribute on
+  /// class or protocol definition. The return value has type char *.
+  llvm::Constant *GetClassName(StringRef RuntimeName);
 
   llvm::Function *GetMethodDefinition(const ObjCMethodDecl *MD);
 
@@ -991,7 +1001,7 @@ protected:
     llvm::SmallPtrSet<const IdentifierInfo*, 16> &PropertySet,
     SmallVectorImpl<llvm::Constant*> &Properties,
     const Decl *Container,
-    const ObjCProtocolDecl *PROTO,
+    const ObjCProtocolDecl *Proto,
     const ObjCCommonTypesHelper &ObjCTypes);
 
   /// GetProtocolRef - Return a reference to the internal protocol
@@ -1009,14 +1019,12 @@ protected:
   /// \param Name - The variable name.
   /// \param Init - The variable initializer; this is also used to
   /// define the type of the variable.
-  /// \param Section - The section the variable should go into, or 0.
+  /// \param Section - The section the variable should go into, or empty.
   /// \param Align - The alignment for the variable, or 0.
   /// \param AddToUsed - Whether the variable should be added to
   /// "llvm.used".
-  llvm::GlobalVariable *CreateMetadataVar(Twine Name,
-                                          llvm::Constant *Init,
-                                          const char *Section,
-                                          unsigned Align,
+  llvm::GlobalVariable *CreateMetadataVar(Twine Name, llvm::Constant *Init,
+                                          StringRef Section, unsigned Align,
                                           bool AddToUsed);
 
   CodeGen::RValue EmitMessageSend(CodeGen::CodeGenFunction &CGF,
@@ -1038,12 +1046,12 @@ public:
   CGObjCCommonMac(CodeGen::CodeGenModule &cgm) :
     CGObjCRuntime(cgm), VMContext(cgm.getLLVMContext()) { }
 
-  virtual llvm::Constant *GenerateConstantString(const StringLiteral *SL);
-  
-  virtual llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
-                                         const ObjCContainerDecl *CD=0);
+  llvm::Constant *GenerateConstantString(const StringLiteral *SL) override;
+
+  llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
+                                 const ObjCContainerDecl *CD=nullptr) override;
 
-  virtual void GenerateProtocol(const ObjCProtocolDecl *PD);
+  void GenerateProtocol(const ObjCProtocolDecl *PD) override;
 
   /// GetOrEmitProtocol - Get the protocol object for the given
   /// declaration, emitting it if necessary. The return value has type
@@ -1055,13 +1063,13 @@ public:
   /// forward references will be filled in with empty bodies if no
   /// definition is seen. The return value has type ProtocolPtrTy.
   virtual llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD)=0;
-  virtual llvm::Constant *BuildGCBlockLayout(CodeGen::CodeGenModule &CGM,
-                                             const CGBlockInfo &blockInfo);
-  virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
-                                             const CGBlockInfo &blockInfo);
-  
-  virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
-                                           QualType T);
+  llvm::Constant *BuildGCBlockLayout(CodeGen::CodeGenModule &CGM,
+                                     const CGBlockInfo &blockInfo) override;
+  llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
+                                     const CGBlockInfo &blockInfo) override;
+
+  llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
+                                   QualType T) override;
 };
 
 class CGObjCMac : public CGObjCCommonMac {
@@ -1092,9 +1100,9 @@ private:
   
   llvm::Value *EmitClassRefFromId(CodeGenFunction &CGF,
                                   IdentifierInfo *II);
-  
-  llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
-  
+
+  llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) override;
+
   /// EmitSuperClassRef - Emits reference to class's main metadata class.
   llvm::Value *EmitSuperClassRef(const ObjCInterfaceDecl *ID);
 
@@ -1145,13 +1153,13 @@ private:
   /// GetOrEmitProtocol - Get the protocol object for the given
   /// declaration, emitting it if necessary. The return value has type
   /// ProtocolPtrTy.
-  virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD);
+  llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) override;
 
   /// GetOrEmitProtocolRef - Get a forward reference to the protocol
   /// object for the given declaration, emitting it if needed. These
   /// forward references will be filled in with empty bodies if no
   /// definition is seen. The return value has type ProtocolPtrTy.
-  virtual llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD);
+  llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) override;
 
   /// EmitProtocolExtension - Generate the protocol extension
   /// structure used to store optional instance and class methods, and
@@ -1177,97 +1185,90 @@ private:
 public:
   CGObjCMac(CodeGen::CodeGenModule &cgm);
 
-  virtual llvm::Function *ModuleInitFunction();
+  llvm::Function *ModuleInitFunction() override;
 
-  virtual CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
-                                              ReturnValueSlot Return,
-                                              QualType ResultType,
-                                              Selector Sel,
-                                              llvm::Value *Receiver,
-                                              const CallArgList &CallArgs,
-                                              const ObjCInterfaceDecl *Class,
-                                              const ObjCMethodDecl *Method);
+  CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
+                                      ReturnValueSlot Return,
+                                      QualType ResultType,
+                                      Selector Sel, llvm::Value *Receiver,
+                                      const CallArgList &CallArgs,
+                                      const ObjCInterfaceDecl *Class,
+                                      const ObjCMethodDecl *Method) override;
 
-  virtual CodeGen::RValue
+  CodeGen::RValue
   GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
-                           ReturnValueSlot Return,
-                           QualType ResultType,
-                           Selector Sel,
-                           const ObjCInterfaceDecl *Class,
-                           bool isCategoryImpl,
-                           llvm::Value *Receiver,
-                           bool IsClassMessage,
-                           const CallArgList &CallArgs,
-                           const ObjCMethodDecl *Method);
-
-  virtual llvm::Value *GetClass(CodeGenFunction &CGF,
-                                const ObjCInterfaceDecl *ID);
-
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, 
-                                   bool lval = false);
+                           ReturnValueSlot Return, QualType ResultType,
+                           Selector Sel, const ObjCInterfaceDecl *Class,
+                           bool isCategoryImpl, llvm::Value *Receiver,
+                           bool IsClassMessage, const CallArgList &CallArgs,
+                           const ObjCMethodDecl *Method) override;
+
+  llvm::Value *GetClass(CodeGenFunction &CGF,
+                        const ObjCInterfaceDecl *ID) override;
+
+  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+                           bool lval = false) override;
 
   /// The NeXT/Apple runtimes do not support typed selectors; just emit an
   /// untyped one.
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
-                                   const ObjCMethodDecl *Method);
+  llvm::Value *GetSelector(CodeGenFunction &CGF,
+                           const ObjCMethodDecl *Method) override;
 
-  virtual llvm::Constant *GetEHType(QualType T);
+  llvm::Constant *GetEHType(QualType T) override;
 
-  virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
+  void GenerateCategory(const ObjCCategoryImplDecl *CMD) override;
 
-  virtual void GenerateClass(const ObjCImplementationDecl *ClassDecl);
+  void GenerateClass(const ObjCImplementationDecl *ClassDecl) override;
 
-  virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) {}
+  void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override {}
 
-  virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
-                                           const ObjCProtocolDecl *PD);
+  llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
+                                   const ObjCProtocolDecl *PD) override;
 
-  virtual llvm::Constant *GetPropertyGetFunction();
-  virtual llvm::Constant *GetPropertySetFunction();
-  virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, 
-                                                          bool copy);
-  virtual llvm::Constant *GetGetStructFunction();
-  virtual llvm::Constant *GetSetStructFunction();
-  virtual llvm::Constant *GetCppAtomicObjectGetFunction();
-  virtual llvm::Constant *GetCppAtomicObjectSetFunction();
-  virtual llvm::Constant *EnumerationMutationFunction();
+  llvm::Constant *GetPropertyGetFunction() override;
+  llvm::Constant *GetPropertySetFunction() override;
+  llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+                                                  bool copy) override;
+  llvm::Constant *GetGetStructFunction() override;
+  llvm::Constant *GetSetStructFunction() override;
+  llvm::Constant *GetCppAtomicObjectGetFunction() override;
+  llvm::Constant *GetCppAtomicObjectSetFunction() override;
+  llvm::Constant *EnumerationMutationFunction() override;
 
-  virtual void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
-                           const ObjCAtTryStmt &S);
-  virtual void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
-                                    const ObjCAtSynchronizedStmt &S);
+  void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
+                   const ObjCAtTryStmt &S) override;
+  void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
+                            const ObjCAtSynchronizedStmt &S) override;
   void EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF, const Stmt &S);
-  virtual void EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
-                             const ObjCAtThrowStmt &S,
-                             bool ClearInsertionPoint=true);
-  virtual llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
-                                         llvm::Value *AddrWeakObj);
-  virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
-                                  llvm::Value *src, llvm::Value *dst);
-  virtual void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
-                                    llvm::Value *src, llvm::Value *dest,
-                                    bool threadlocal = false);
-  virtual void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
-                                  llvm::Value *src, llvm::Value *dest,
-                                  llvm::Value *ivarOffset);
-  virtual void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
-                                        llvm::Value *src, llvm::Value *dest);
-  virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
-                                        llvm::Value *dest, llvm::Value *src,
-                                        llvm::Value *size);
-
-  virtual LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF,
-                                      QualType ObjectTy,
-                                      llvm::Value *BaseValue,
-                                      const ObjCIvarDecl *Ivar,
-                                      unsigned CVRQualifiers);
-  virtual llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
-                                      const ObjCInterfaceDecl *Interface,
-                                      const ObjCIvarDecl *Ivar);
-  
+  void EmitThrowStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtThrowStmt &S,
+                     bool ClearInsertionPoint=true) override;
+  llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
+                                 llvm::Value *AddrWeakObj) override;
+  void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
+                          llvm::Value *src, llvm::Value *dst) override;
+  void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
+                            llvm::Value *src, llvm::Value *dest,
+                            bool threadlocal = false) override;
+  void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
+                          llvm::Value *src, llvm::Value *dest,
+                          llvm::Value *ivarOffset) override;
+  void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
+                                llvm::Value *src, llvm::Value *dest) override;
+  void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
+                                llvm::Value *dest, llvm::Value *src,
+                                llvm::Value *size) override;
+
+  LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, QualType ObjectTy,
+                              llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
+                              unsigned CVRQualifiers) override;
+  llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
+                              const ObjCInterfaceDecl *Interface,
+                              const ObjCIvarDecl *Ivar) override;
+
   /// GetClassGlobal - Return the global variable for the Objective-C
   /// class of the given name.
-  virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) {
+  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+                                       bool Weak = false) override {
     llvm_unreachable("CGObjCMac::GetClassGlobal");
   }
 };
@@ -1312,11 +1313,12 @@ private:
                                               unsigned InstanceStart,
                                               unsigned InstanceSize,
                                               const ObjCImplementationDecl *ID);
-  llvm::GlobalVariable * BuildClassMetaData(std::string &ClassName,
+  llvm::GlobalVariable * BuildClassMetaData(const std::string &ClassName,
                                             llvm::Constant *IsAGV,
                                             llvm::Constant *SuperClassGV,
                                             llvm::Constant *ClassRoGV,
-                                            bool HiddenVisibility);
+                                            bool HiddenVisibility,
+                                            bool Weak);
 
   llvm::Constant *GetMethodConstant(const ObjCMethodDecl *MD);
 
@@ -1341,13 +1343,13 @@ private:
   /// GetOrEmitProtocol - Get the protocol object for the given
   /// declaration, emitting it if necessary. The return value has type
   /// ProtocolPtrTy.
-  virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD);
+  llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) override;
 
   /// GetOrEmitProtocolRef - Get a forward reference to the protocol
   /// object for the given declaration, emitting it if needed. These
   /// forward references will be filled in with empty bodies if no
   /// definition is seen. The return value has type ProtocolPtrTy.
-  virtual llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD);
+  llvm::Constant *GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) override;
 
   /// EmitProtocolList - Generate the list of referenced
   /// protocols. The return value has type ProtocolListPtrTy.
@@ -1367,17 +1369,19 @@ private:
   
   /// GetClassGlobal - Return the global variable for the Objective-C
   /// class of the given name.
-  llvm::GlobalVariable *GetClassGlobal(const std::string &Name);
-    
+  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+                                       bool Weak = false) override;
+
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given class reference.
   llvm::Value *EmitClassRef(CodeGenFunction &CGF,
                             const ObjCInterfaceDecl *ID);
   
   llvm::Value *EmitClassRefFromId(CodeGenFunction &CGF,
-                                  IdentifierInfo *II);
-  
-  llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF);
+                                  IdentifierInfo *II, bool Weak,
+                                  const ObjCInterfaceDecl *ID);
+
+  llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) override;
 
   /// EmitSuperClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given super class reference.
@@ -1387,7 +1391,7 @@ private:
   /// EmitMetaClassRef - Return a Value * of the address of _class_t
   /// meta-data
   llvm::Value *EmitMetaClassRef(CodeGenFunction &CGF,
-                                const ObjCInterfaceDecl *ID);
+                                const ObjCInterfaceDecl *ID, bool Weak);
 
   /// ObjCIvarOffsetVariable - Returns the ivar offset variable for
   /// the given ivar.
@@ -1434,11 +1438,10 @@ private:
   bool ImplementationIsNonLazy(const ObjCImplDecl *OD) const;
 
   bool IsIvarOffsetKnownIdempotent(const CodeGen::CodeGenFunction &CGF,
-                                   const ObjCInterfaceDecl *ID,
                                    const ObjCIvarDecl *IV) {
-    // Annotate the load as an invariant load iff the object type is the type,
-    // or a derived type, of the class containing the ivar within an ObjC
-    // method.  This check is needed because the ivar offset is a lazily
+    // Annotate the load as an invariant load iff inside an instance method
+    // and ivar belongs to instance method's class and one of its super class.
+    // This check is needed because the ivar offset is a lazily
     // initialised value that may depend on objc_msgSend to perform a fixup on
     // the first message dispatch.
     //
@@ -1446,128 +1449,122 @@ private:
     // base of the ivar access is a parameter to an Objective C method.
     // However, because the parameters are not available in the current
     // interface, we cannot perform this check.
-    if (CGF.CurFuncDecl && isa<ObjCMethodDecl>(CGF.CurFuncDecl))
-      if (IV->getContainingInterface()->isSuperClassOf(ID))
-        return true;
+    if (const ObjCMethodDecl *MD =
+          dyn_cast_or_null<ObjCMethodDecl>(CGF.CurFuncDecl))
+      if (MD->isInstanceMethod())
+        if (const ObjCInterfaceDecl *ID = MD->getClassInterface())
+          return IV->getContainingInterface()->isSuperClassOf(ID);
     return false;
   }
 
 public:
   CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm);
   // FIXME. All stubs for now!
-  virtual llvm::Function *ModuleInitFunction();
-
-  virtual CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
-                                              ReturnValueSlot Return,
-                                              QualType ResultType,
-                                              Selector Sel,
-                                              llvm::Value *Receiver,
-                                              const CallArgList &CallArgs,
-                                              const ObjCInterfaceDecl *Class,
-                                              const ObjCMethodDecl *Method);
-
-  virtual CodeGen::RValue
+  llvm::Function *ModuleInitFunction() override;
+
+  CodeGen::RValue GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
+                                      ReturnValueSlot Return,
+                                      QualType ResultType, Selector Sel,
+                                      llvm::Value *Receiver,
+                                      const CallArgList &CallArgs,
+                                      const ObjCInterfaceDecl *Class,
+                                      const ObjCMethodDecl *Method) override;
+
+  CodeGen::RValue
   GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
-                           ReturnValueSlot Return,
-                           QualType ResultType,
-                           Selector Sel,
-                           const ObjCInterfaceDecl *Class,
-                           bool isCategoryImpl,
-                           llvm::Value *Receiver,
-                           bool IsClassMessage,
-                           const CallArgList &CallArgs,
-                           const ObjCMethodDecl *Method);
-
-  virtual llvm::Value *GetClass(CodeGenFunction &CGF,
-                                const ObjCInterfaceDecl *ID);
-
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-                                   bool lvalue = false)
+                           ReturnValueSlot Return, QualType ResultType,
+                           Selector Sel, const ObjCInterfaceDecl *Class,
+                           bool isCategoryImpl, llvm::Value *Receiver,
+                           bool IsClassMessage, const CallArgList &CallArgs,
+                           const ObjCMethodDecl *Method) override;
+
+  llvm::Value *GetClass(CodeGenFunction &CGF,
+                        const ObjCInterfaceDecl *ID) override;
+
+  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
+                           bool lvalue = false) override
     { return EmitSelector(CGF, Sel, lvalue); }
 
   /// The NeXT/Apple runtimes do not support typed selectors; just emit an
   /// untyped one.
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
-                                   const ObjCMethodDecl *Method)
+  llvm::Value *GetSelector(CodeGenFunction &CGF,
+                           const ObjCMethodDecl *Method) override
     { return EmitSelector(CGF, Method->getSelector()); }
 
-  virtual void GenerateCategory(const ObjCCategoryImplDecl *CMD);
+  void GenerateCategory(const ObjCCategoryImplDecl *CMD) override;
 
-  virtual void GenerateClass(const ObjCImplementationDecl *ClassDecl);
+  void GenerateClass(const ObjCImplementationDecl *ClassDecl) override;
 
-  virtual void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) {}
+  void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override {}
 
-  virtual llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
-                                           const ObjCProtocolDecl *PD);
+  llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF,
+                                   const ObjCProtocolDecl *PD) override;
 
-  virtual llvm::Constant *GetEHType(QualType T);
+  llvm::Constant *GetEHType(QualType T) override;
 
-  virtual llvm::Constant *GetPropertyGetFunction() {
+  llvm::Constant *GetPropertyGetFunction() override {
     return ObjCTypes.getGetPropertyFn();
   }
-  virtual llvm::Constant *GetPropertySetFunction() {
+  llvm::Constant *GetPropertySetFunction() override {
     return ObjCTypes.getSetPropertyFn();
   }
-  
-  virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, 
-                                                          bool copy) {
+
+  llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
+                                                  bool copy) override {
     return ObjCTypes.getOptimizedSetPropertyFn(atomic, copy);
   }
-  
-  virtual llvm::Constant *GetSetStructFunction() {
+
+  llvm::Constant *GetSetStructFunction() override {
     return ObjCTypes.getCopyStructFn();
   }
-  virtual llvm::Constant *GetGetStructFunction() {
+  llvm::Constant *GetGetStructFunction() override {
     return ObjCTypes.getCopyStructFn();
   }
-  virtual llvm::Constant *GetCppAtomicObjectSetFunction() {
+  llvm::Constant *GetCppAtomicObjectSetFunction() override {
     return ObjCTypes.getCppAtomicObjectFunction();
   }
-  virtual llvm::Constant *GetCppAtomicObjectGetFunction() {
+  llvm::Constant *GetCppAtomicObjectGetFunction() override {
     return ObjCTypes.getCppAtomicObjectFunction();
   }
-  
-  virtual llvm::Constant *EnumerationMutationFunction() {
+
+  llvm::Constant *EnumerationMutationFunction() override {
     return ObjCTypes.getEnumerationMutationFn();
   }
 
-  virtual void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
-                           const ObjCAtTryStmt &S);
-  virtual void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
-                                    const ObjCAtSynchronizedStmt &S);
-  virtual void EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
-                             const ObjCAtThrowStmt &S,
-                             bool ClearInsertionPoint=true);
-  virtual llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
-                                         llvm::Value *AddrWeakObj);
-  virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
-                                  llvm::Value *src, llvm::Value *dst);
-  virtual void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
-                                    llvm::Value *src, llvm::Value *dest,
-                                    bool threadlocal = false);
-  virtual void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
-                                  llvm::Value *src, llvm::Value *dest,
-                                  llvm::Value *ivarOffset);
-  virtual void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
-                                        llvm::Value *src, llvm::Value *dest);
-  virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
-                                        llvm::Value *dest, llvm::Value *src,
-                                        llvm::Value *size);
-  virtual LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF,
-                                      QualType ObjectTy,
-                                      llvm::Value *BaseValue,
-                                      const ObjCIvarDecl *Ivar,
-                                      unsigned CVRQualifiers);
-  virtual llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
-                                      const ObjCInterfaceDecl *Interface,
-                                      const ObjCIvarDecl *Ivar);
+  void EmitTryStmt(CodeGen::CodeGenFunction &CGF,
+                   const ObjCAtTryStmt &S) override;
+  void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
+                            const ObjCAtSynchronizedStmt &S) override;
+  void EmitThrowStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtThrowStmt &S,
+                     bool ClearInsertionPoint=true) override;
+  llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
+                                 llvm::Value *AddrWeakObj) override;
+  void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
+                          llvm::Value *src, llvm::Value *dst) override;
+  void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
+                            llvm::Value *src, llvm::Value *dest,
+                            bool threadlocal = false) override;
+  void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
+                          llvm::Value *src, llvm::Value *dest,
+                          llvm::Value *ivarOffset) override;
+  void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
+                                llvm::Value *src, llvm::Value *dest) override;
+  void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
+                                llvm::Value *dest, llvm::Value *src,
+                                llvm::Value *size) override;
+  LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, QualType ObjectTy,
+                              llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
+                              unsigned CVRQualifiers) override;
+  llvm::Value *EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
+                              const ObjCInterfaceDecl *Interface,
+                              const ObjCIvarDecl *Ivar) override;
 };
 
 /// A helper class for performing the null-initialization of a return
 /// value.
 struct NullReturnState {
   llvm::BasicBlock *NullBB;
-  NullReturnState() : NullBB(0) {}
+  NullReturnState() : NullBB(nullptr) {}
 
   /// Perform a null-check of the given receiver.
   void init(CodeGenFunction &CGF, llvm::Value *receiver) {
@@ -1596,8 +1593,8 @@ struct NullReturnState {
 
     // The continuation block.  This will be left null if we don't have an
     // IP, which can happen if the method we're calling is marked noreturn.
-    llvm::BasicBlock *contBB = 0;
-    
+    llvm::BasicBlock *contBB = nullptr;
+
     // Finish the call path.
     llvm::BasicBlock *callBB = CGF.Builder.GetInsertBlock();
     if (callBB) {
@@ -1878,14 +1875,14 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
   MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs);
 
   if (Method)
-    assert(CGM.getContext().getCanonicalType(Method->getResultType()) ==
-           CGM.getContext().getCanonicalType(ResultType) &&
+    assert(CGM.getContext().getCanonicalType(Method->getReturnType()) ==
+               CGM.getContext().getCanonicalType(ResultType) &&
            "Result type mismatch!");
 
   NullReturnState nullReturn;
 
-  llvm::Constant *Fn = NULL;
-  if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) {
+  llvm::Constant *Fn = nullptr;
+  if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
     if (!IsSuper) nullReturn.init(CGF, Arg0);
     Fn = (ObjCABI == 2) ?  ObjCTypes.getSendStretFn2(IsSuper)
       : ObjCTypes.getSendStretFn(IsSuper);
@@ -1896,15 +1893,17 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
     Fn = (ObjCABI == 2) ? ObjCTypes.getSendFp2RetFn2(IsSuper)
       : ObjCTypes.getSendFp2retFn(IsSuper);
   } else {
+    // arm64 uses objc_msgSend for stret methods and yet null receiver check
+    // must be made for it.
+    if (!IsSuper && CGM.ReturnTypeUsesSRet(MSI.CallInfo))
+      nullReturn.init(CGF, Arg0);
     Fn = (ObjCABI == 2) ? ObjCTypes.getSendFn2(IsSuper)
       : ObjCTypes.getSendFn(IsSuper);
   }
   
   bool requiresnullCheck = false;
   if (CGM.getLangOpts().ObjCAutoRefCount && Method)
-    for (ObjCMethodDecl::param_const_iterator i = Method->param_begin(),
-         e = Method->param_end(); i != e; ++i) {
-      const ParmVarDecl *ParamDecl = (*i);
+    for (const auto *ParamDecl : Method->params()) {
       if (ParamDecl->hasAttr<NSConsumedAttr>()) {
         if (!nullReturn.NullBB)
           nullReturn.init(CGF, Arg0);
@@ -1916,7 +1915,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
   Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType);
   RValue rvalue = CGF.EmitCall(MSI.CallInfo, Fn, Return, ActualArgs);
   return nullReturn.complete(CGF, rvalue, ResultType, CallArgs,
-                             requiresnullCheck ? Method : 0);
+                             requiresnullCheck ? Method : nullptr);
 }
 
 static Qualifiers::GC GetGCAttrTypeForType(ASTContext &Ctx, QualType FQT) {
@@ -1967,9 +1966,8 @@ llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM,
   // to be GC'ed.
 
   // Walk the captured variables.
-  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-         ce = blockDecl->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
+  for (const auto &CI : blockDecl->captures()) {
+    const VarDecl *variable = CI.getVariable();
     QualType type = variable->getType();
 
     const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
@@ -1980,7 +1978,7 @@ llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM,
     uint64_t fieldOffset = layout->getElementOffset(capture.getIndex());
 
     // __block variables are passed by their descriptor address.
-    if (ci->isByRef()) {
+    if (CI.isByRef()) {
       IvarsInfo.push_back(GC_IVAR(fieldOffset, /*size in words*/ 1));
       continue;
     }
@@ -2071,8 +2069,8 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
                                           bool ByrefLayout) {
   bool IsUnion = (RD && RD->isUnion());
   CharUnits MaxUnionSize = CharUnits::Zero();
-  const FieldDecl *MaxField = 0;
-  const FieldDecl *LastFieldBitfieldOrUnnamed = 0;
+  const FieldDecl *MaxField = nullptr;
+  const FieldDecl *LastFieldBitfieldOrUnnamed = nullptr;
   CharUnits MaxFieldOffset = CharUnits::Zero();
   CharUnits LastBitfieldOrUnnamedOffset = CharUnits::Zero();
   
@@ -2094,8 +2092,8 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
       LastBitfieldOrUnnamedOffset = FieldOffset;
       continue;
     }
-    
-    LastFieldBitfieldOrUnnamed = 0;
+
+    LastFieldBitfieldOrUnnamed = nullptr;
     QualType FQT = Field->getType();
     if (FQT->isRecordType() || FQT->isUnionType()) {
       if (FQT->isUnionType())
@@ -2118,9 +2116,6 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout,
         ElCount *= CArray->getSize().getZExtValue();
         FQT = CArray->getElementType();
       }
-      
-      assert(!FQT->isUnionType() &&
-             "layout for array of unions not supported");
       if (FQT->isRecordType() && ElCount) {
         int OldIndex = RunSkipBlockVars.size() - 1;
         const RecordType *RT = FQT->getAs<RecordType>();
@@ -2196,10 +2191,7 @@ void CGObjCCommonMac::BuildRCBlockVarRecordLayout(const RecordType *RT,
                                                   bool &HasUnion,
                                                   bool ByrefLayout) {
   const RecordDecl *RD = RT->getDecl();
-  SmallVector<const FieldDecl*, 16> Fields;
-  for (RecordDecl::field_iterator i = RD->field_begin(),
-       e = RD->field_end(); i != e; ++i)
-    Fields.push_back(*i);
+  SmallVector<const FieldDecl*, 16> Fields(RD->fields());
   llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(RT, 0));
   const llvm::StructLayout *RecLayout =
     CGM.getDataLayout().getStructLayout(cast<llvm::StructType>(Ty));
@@ -2486,9 +2478,8 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
                            blockInfo.BlockHeaderForcedGapOffset,
                            blockInfo.BlockHeaderForcedGapSize);
   // Walk the captured variables.
-  for (BlockDecl::capture_const_iterator ci = blockDecl->capture_begin(),
-       ce = blockDecl->capture_end(); ci != ce; ++ci) {
-    const VarDecl *variable = ci->getVariable();
+  for (const auto &CI : blockDecl->captures()) {
+    const VarDecl *variable = CI.getVariable();
     QualType type = variable->getType();
     
     const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
@@ -2500,17 +2491,17 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
        CharUnits::fromQuantity(layout->getElementOffset(capture.getIndex()));
     
     assert(!type->isArrayType() && "array variable should not be caught");
-    if (!ci->isByRef())
+    if (!CI.isByRef())
       if (const RecordType *record = type->getAs<RecordType>()) {
         BuildRCBlockVarRecordLayout(record, fieldOffset, hasUnion);
         continue;
       }
     CharUnits fieldSize;
-    if (ci->isByRef())
+    if (CI.isByRef())
       fieldSize = CharUnits::fromQuantity(WordSizeInBytes);
     else
       fieldSize = CGM.getContext().getTypeSizeInChars(type);
-    UpdateRunSkipBlockVars(ci->isByRef(), getBlockCaptureLifetime(type, false),
+    UpdateRunSkipBlockVars(CI.isByRef(), getBlockCaptureLifetime(type, false),
                            fieldOffset, fieldSize);
   }
   return getBitmapBlockLayout(false);
@@ -2562,8 +2553,16 @@ llvm::Constant *CGObjCCommonMac::GetProtocolRef(const ObjCProtocolDecl *PD) {
   return GetOrEmitProtocolRef(PD);
 }
 
+static void assertPrivateName(const llvm::GlobalValue *GV) {
+  StringRef NameRef = GV->getName();
+  (void)NameRef;
+  assert(NameRef[0] == '\01' && (NameRef[1] == 'L' || NameRef[1] == 'l'));
+  assert(GV->getVisibility() == llvm::GlobalValue::DefaultVisibility);
+  assert(GV->hasPrivateLinkage());
+}
+
 /*
-// APPLE LOCAL radar 4585769 - Objective-C 1.0 extensions
+// Objective-C 1.0 extensions
 struct _objc_protocol {
 struct _objc_protocol_extension *isa;
 char *protocol_name;
@@ -2593,9 +2592,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocol(const ObjCProtocolDecl *PD) {
   std::vector<llvm::Constant*> InstanceMethods, ClassMethods;
   std::vector<llvm::Constant*> OptInstanceMethods, OptClassMethods;
   std::vector<llvm::Constant*> MethodTypesExt, OptMethodTypesExt;
-  for (ObjCProtocolDecl::instmeth_iterator
-         i = PD->instmeth_begin(), e = PD->instmeth_end(); i != e; ++i) {
-    ObjCMethodDecl *MD = *i;
+  for (const auto *MD : PD->instance_methods()) {
     llvm::Constant *C = GetMethodDescriptionConstant(MD);
     if (!C)
       return GetOrEmitProtocolRef(PD);
@@ -2609,9 +2606,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocol(const ObjCProtocolDecl *PD) {
     }
   }
 
-  for (ObjCProtocolDecl::classmeth_iterator
-         i = PD->classmeth_begin(), e = PD->classmeth_end(); i != e; ++i) {
-    ObjCMethodDecl *MD = *i;
+  for (const auto *MD : PD->class_methods()) {
     llvm::Constant *C = GetMethodDescriptionConstant(MD);
     if (!C)
       return GetOrEmitProtocolRef(PD);
@@ -2631,7 +2626,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocol(const ObjCProtocolDecl *PD) {
   llvm::Constant *Values[] = {
     EmitProtocolExtension(PD, OptInstanceMethods, OptClassMethods, 
                           MethodTypesExt),
-    GetClassName(PD->getIdentifier()),
+    GetClassName(PD->getObjCRuntimeNameAsString()),
     EmitProtocolList("\01L_OBJC_PROTOCOL_REFS_" + PD->getName(),
                      PD->protocol_begin(),
                      PD->protocol_end()),
@@ -2646,13 +2641,13 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocol(const ObjCProtocolDecl *PD) {
                                                    Values);
 
   if (Entry) {
-    // Already created, fix the linkage and update the initializer.
-    Entry->setLinkage(llvm::GlobalValue::InternalLinkage);
+    // Already created, update the initializer.
+    assert(Entry->hasPrivateLinkage());
     Entry->setInitializer(Init);
   } else {
     Entry =
       new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolTy, false,
-                               llvm::GlobalValue::InternalLinkage,
+                               llvm::GlobalValue::PrivateLinkage,
                                Init,
                                "\01L_OBJC_PROTOCOL_" + PD->getName());
     Entry->setSection("__OBJC,__protocol,regular,no_dead_strip");
@@ -2661,7 +2656,8 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocol(const ObjCProtocolDecl *PD) {
 
     Protocols[PD->getIdentifier()] = Entry;
   }
-  CGM.AddUsedGlobal(Entry);
+  assertPrivateName(Entry);
+  CGM.addCompilerUsedGlobal(Entry);
 
   return Entry;
 }
@@ -2675,13 +2671,14 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) {
     // contents for protocols which were referenced but never defined.
     Entry =
       new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolTy, false,
-                               llvm::GlobalValue::ExternalLinkage,
-                               0,
+                               llvm::GlobalValue::PrivateLinkage,
+                               nullptr,
                                "\01L_OBJC_PROTOCOL_" + PD->getName());
     Entry->setSection("__OBJC,__protocol,regular,no_dead_strip");
     // FIXME: Is this necessary? Why only for protocol?
     Entry->setAlignment(4);
   }
+  assertPrivateName(Entry);
 
   return Entry;
 }
@@ -2711,8 +2708,8 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD,
     EmitMethodDescList("\01L_OBJC_PROTOCOL_CLASS_METHODS_OPT_" + PD->getName(),
                        "__OBJC,__cat_cls_meth,regular,no_dead_strip",
                        OptClassMethods),
-    EmitPropertyList("\01L_OBJC_$_PROP_PROTO_LIST_" + PD->getName(), 0, PD,
-                     ObjCTypes),
+    EmitPropertyList("\01L_OBJC_$_PROP_PROTO_LIST_" + PD->getName(), nullptr,
+                     PD, ObjCTypes),
     EmitProtocolMethodTypes("\01L_OBJC_PROTOCOL_METHOD_TYPES_" + PD->getName(),
                             MethodTypesExt, ObjCTypes)
   };
@@ -2726,9 +2723,8 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD,
     llvm::ConstantStruct::get(ObjCTypes.ProtocolExtensionTy, Values);
 
   // No special section, but goes in llvm.used
-  return CreateMetadataVar("\01L_OBJC_PROTOCOLEXT_" + PD->getName(),
-                           Init,
-                           0, 0, true);
+  return CreateMetadataVar("\01l_OBJC_PROTOCOLEXT_" + PD->getName(), Init,
+                           StringRef(), 0, true);
 }
 
 /*
@@ -2775,14 +2771,11 @@ void CGObjCCommonMac::
 PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet,
                        SmallVectorImpl<llvm::Constant *> &Properties,
                        const Decl *Container,
-                       const ObjCProtocolDecl *PROTO,
+                       const ObjCProtocolDecl *Proto,
                        const ObjCCommonTypesHelper &ObjCTypes) {
-  for (ObjCProtocolDecl::protocol_iterator P = PROTO->protocol_begin(),
-         E = PROTO->protocol_end(); P != E; ++P) 
-    PushProtocolProperties(PropertySet, Properties, Container, (*P), ObjCTypes);
-  for (ObjCContainerDecl::prop_iterator I = PROTO->prop_begin(),
-       E = PROTO->prop_end(); I != E; ++I) {
-    const ObjCPropertyDecl *PD = *I;
+  for (const auto *P : Proto->protocols()) 
+    PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+  for (const auto *PD : Proto->properties()) {
     if (!PropertySet.insert(PD->getIdentifier()))
       continue;
     llvm::Constant *Prop[] = {
@@ -2811,9 +2804,7 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
                                        const ObjCCommonTypesHelper &ObjCTypes) {
   SmallVector<llvm::Constant *, 16> Properties;
   llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
-  for (ObjCContainerDecl::prop_iterator I = OCD->prop_begin(),
-         E = OCD->prop_end(); I != E; ++I) {
-    const ObjCPropertyDecl *PD = *I;
+  for (const auto *PD : OCD->properties()) {
     PropertySet.insert(PD->getIdentifier());
     llvm::Constant *Prop[] = {
       GetPropertyName(PD->getIdentifier()),
@@ -2823,17 +2814,12 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
                                                    Prop));
   }
   if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) {
-    for (ObjCInterfaceDecl::all_protocol_iterator
-         P = OID->all_referenced_protocol_begin(),
-         E = OID->all_referenced_protocol_end(); P != E; ++P)
-      PushProtocolProperties(PropertySet, Properties, Container, (*P), 
-                             ObjCTypes);
+    for (const auto *P : OID->all_referenced_protocols())
+      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
   }
   else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD)) {
-    for (ObjCCategoryDecl::protocol_iterator P = CD->protocol_begin(),
-         E = CD->protocol_end(); P != E; ++P)
-      PushProtocolProperties(PropertySet, Properties, Container, (*P), 
-                             ObjCTypes);
+    for (const auto *P : CD->protocols())
+      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
   }
 
   // Return null for empty list.
@@ -2871,11 +2857,9 @@ CGObjCCommonMac::EmitProtocolMethodTypes(Twine Name,
                                              MethodTypes.size());
   llvm::Constant *Init = llvm::ConstantArray::get(AT, MethodTypes);
 
-  llvm::GlobalVariable *GV = 
-    CreateMetadataVar(Name, Init,
-                      (ObjCABI == 2) ? "__DATA, __objc_const" : 0,
-                      (ObjCABI == 2) ? 8 : 4,
-                      true);
+  llvm::GlobalVariable *GV = CreateMetadataVar(
+      Name, Init, (ObjCABI == 2) ? "__DATA, __objc_const" : StringRef(),
+      (ObjCABI == 2) ? 8 : 4, true);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.Int8PtrPtrTy);
 }
 
@@ -2893,8 +2877,8 @@ CGObjCMac::GetMethodDescriptionConstant(const ObjCMethodDecl *MD) {
     GetMethodVarType(MD)
   };
   if (!Desc[1])
-    return 0;
-  
+    return nullptr;
+
   return llvm::ConstantStruct::get(ObjCTypes.MethodDescriptionTy,
                                    Desc);
 }
@@ -2945,20 +2929,17 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
                                      << OCD->getName();
 
   SmallVector<llvm::Constant *, 16> InstanceMethods, ClassMethods;
-  for (ObjCCategoryImplDecl::instmeth_iterator
-         i = OCD->instmeth_begin(), e = OCD->instmeth_end(); i != e; ++i) {
+  for (const auto *I : OCD->instance_methods())
     // Instance methods should always be defined.
-    InstanceMethods.push_back(GetMethodConstant(*i));
-  }
-  for (ObjCCategoryImplDecl::classmeth_iterator
-         i = OCD->classmeth_begin(), e = OCD->classmeth_end(); i != e; ++i) {
+    InstanceMethods.push_back(GetMethodConstant(I));
+
+  for (const auto *I : OCD->class_methods())
     // Class methods should always be defined.
-    ClassMethods.push_back(GetMethodConstant(*i));
-  }
+    ClassMethods.push_back(GetMethodConstant(I));
 
   llvm::Constant *Values[7];
-  Values[0] = GetClassName(OCD->getIdentifier());
-  Values[1] = GetClassName(Interface->getIdentifier());
+  Values[0] = GetClassName(OCD->getName());
+  Values[1] = GetClassName(Interface->getObjCRuntimeNameAsString());
   LazySymbols.insert(Interface->getIdentifier());
   Values[2] =
     EmitMethodList("\01L_OBJC_CATEGORY_INSTANCE_METHODS_" + ExtName.str(),
@@ -3073,21 +3054,15 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
     Flags |= FragileABI_Class_Hidden;
 
   SmallVector<llvm::Constant *, 16> InstanceMethods, ClassMethods;
-  for (ObjCImplementationDecl::instmeth_iterator
-         i = ID->instmeth_begin(), e = ID->instmeth_end(); i != e; ++i) {
+  for (const auto *I : ID->instance_methods())
     // Instance methods should always be defined.
-    InstanceMethods.push_back(GetMethodConstant(*i));
-  }
-  for (ObjCImplementationDecl::classmeth_iterator
-         i = ID->classmeth_begin(), e = ID->classmeth_end(); i != e; ++i) {
-    // Class methods should always be defined.
-    ClassMethods.push_back(GetMethodConstant(*i));
-  }
+    InstanceMethods.push_back(GetMethodConstant(I));
 
-  for (ObjCImplementationDecl::propimpl_iterator
-         i = ID->propimpl_begin(), e = ID->propimpl_end(); i != e; ++i) {
-    ObjCPropertyImplDecl *PID = *i;
+  for (const auto *I : ID->class_methods())
+    // Class methods should always be defined.
+    ClassMethods.push_back(GetMethodConstant(I));
 
+  for (const auto *PID : ID->property_impls()) {
     if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) {
       ObjCPropertyDecl *PD = PID->getPropertyDecl();
 
@@ -3107,12 +3082,12 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
     LazySymbols.insert(Super->getIdentifier());
 
     Values[ 1] =
-      llvm::ConstantExpr::getBitCast(GetClassName(Super->getIdentifier()),
+      llvm::ConstantExpr::getBitCast(GetClassName(Super->getObjCRuntimeNameAsString()),
                                      ObjCTypes.ClassPtrTy);
   } else {
     Values[ 1] = llvm::Constant::getNullValue(ObjCTypes.ClassPtrTy);
   }
-  Values[ 2] = GetClassName(ID->getIdentifier());
+  Values[ 2] = GetClassName(ID->getObjCRuntimeNameAsString());
   // Version is always 0.
   Values[ 3] = llvm::ConstantInt::get(ObjCTypes.LongTy, 0);
   Values[ 4] = llvm::ConstantInt::get(ObjCTypes.LongTy, Flags);
@@ -3133,19 +3108,19 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
   Name += ClassName;
   const char *Section = "__OBJC,__class,regular,no_dead_strip";
   // Check for a forward reference.
-  llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
+  llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
   if (GV) {
     assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
            "Forward metaclass reference has incorrect type.");
-    GV->setLinkage(llvm::GlobalValue::InternalLinkage);
     GV->setInitializer(Init);
     GV->setSection(Section);
     GV->setAlignment(4);
-    CGM.AddUsedGlobal(GV);
-  } 
-  else
+    CGM.addCompilerUsedGlobal(GV);
+  } else
     GV = CreateMetadataVar(Name, Init, Section, 4, true);
+  assertPrivateName(GV);
   DefinedClasses.push_back(GV);
+  ImplementedClasses.push_back(Interface);
   // method definition entries must be clear for next implementation.
   MethodDefinitions.clear();
 }
@@ -3165,19 +3140,19 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID,
   while (const ObjCInterfaceDecl *Super = Root->getSuperClass())
     Root = Super;
   Values[ 0] =
-    llvm::ConstantExpr::getBitCast(GetClassName(Root->getIdentifier()),
+    llvm::ConstantExpr::getBitCast(GetClassName(Root->getObjCRuntimeNameAsString()),
                                    ObjCTypes.ClassPtrTy);
   // The super class for the metaclass is emitted as the name of the
   // super class. The runtime fixes this up to point to the
   // *metaclass* for the super class.
   if (ObjCInterfaceDecl *Super = ID->getClassInterface()->getSuperClass()) {
     Values[ 1] =
-      llvm::ConstantExpr::getBitCast(GetClassName(Super->getIdentifier()),
+      llvm::ConstantExpr::getBitCast(GetClassName(Super->getObjCRuntimeNameAsString()),
                                      ObjCTypes.ClassPtrTy);
   } else {
     Values[ 1] = llvm::Constant::getNullValue(ObjCTypes.ClassPtrTy);
   }
-  Values[ 2] = GetClassName(ID->getIdentifier());
+  Values[ 2] = GetClassName(ID->getObjCRuntimeNameAsString());
   // Version is always 0.
   Values[ 3] = llvm::ConstantInt::get(ObjCTypes.LongTy, 0);
   Values[ 4] = llvm::ConstantInt::get(ObjCTypes.LongTy, Flags);
@@ -3201,20 +3176,20 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID,
   Name += ID->getName();
 
   // Check for a forward reference.
-  llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
+  llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
   if (GV) {
     assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
            "Forward metaclass reference has incorrect type.");
-    GV->setLinkage(llvm::GlobalValue::InternalLinkage);
     GV->setInitializer(Init);
   } else {
     GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
-                                  llvm::GlobalValue::InternalLinkage,
+                                  llvm::GlobalValue::PrivateLinkage,
                                   Init, Name);
   }
+  assertPrivateName(GV);
   GV->setSection("__OBJC,__meta_class,regular,no_dead_strip");
   GV->setAlignment(4);
-  CGM.AddUsedGlobal(GV);
+  CGM.addCompilerUsedGlobal(GV);
 
   return GV;
 }
@@ -3230,35 +3205,31 @@ llvm::Constant *CGObjCMac::EmitMetaClassRef(const ObjCInterfaceDecl *ID) {
   // Check for an existing forward reference.
   // Previously, metaclass with internal linkage may have been defined.
   // pass 'true' as 2nd argument so it is returned.
-  if (llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name,
-                                                                   true)) {
-    assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
-           "Forward metaclass reference has incorrect type.");
-    return GV;
-  } else {
-    // Generate as an external reference to keep a consistent
-    // module. This will be patched up when we emit the metaclass.
-    return new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
-                                    llvm::GlobalValue::ExternalLinkage,
-                                    0,
-                                    Name);
-  }
+  llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
+  if (!GV)
+    GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
+                                  llvm::GlobalValue::PrivateLinkage, nullptr,
+                                  Name);
+
+  assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+         "Forward metaclass reference has incorrect type.");
+  assertPrivateName(GV);
+  return GV;
 }
 
 llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) {
   std::string Name = "\01L_OBJC_CLASS_" + ID->getNameAsString();
-  
-  if (llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name,
-                                                                   true)) {
-    assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
-           "Forward class metadata reference has incorrect type.");
-    return GV;
-  } else {
-    return new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
-                                    llvm::GlobalValue::ExternalLinkage,
-                                    0,
-                                    Name);
-  }
+  llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
+
+  if (!GV)
+    GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
+                                  llvm::GlobalValue::PrivateLinkage, nullptr,
+                                  Name);
+
+  assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
+         "Forward class metadata reference has incorrect type.");
+  assertPrivateName(GV);
+  return GV;
 }
 
 /*
@@ -3373,7 +3344,7 @@ llvm::Constant *CGObjCMac::EmitIvarList(const ObjCImplementationDecl *ID,
 llvm::Constant *CGObjCMac::GetMethodConstant(const ObjCMethodDecl *MD) {
   llvm::Function *Fn = GetMethodDefinition(MD);
   if (!Fn)
-    return 0;
+    return nullptr;
 
   llvm::Constant *Method[] = {
     llvm::ConstantExpr::getBitCast(GetMethodVarName(MD->getSelector()),
@@ -3421,29 +3392,29 @@ llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD,
   return Method;
 }
 
-llvm::GlobalVariable *
-CGObjCCommonMac::CreateMetadataVar(Twine Name,
-                                   llvm::Constant *Init,
-                                   const char *Section,
-                                   unsigned Align,
-                                   bool AddToUsed) {
+llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name,
+                                                         llvm::Constant *Init,
+                                                         StringRef Section,
+                                                         unsigned Align,
+                                                         bool AddToUsed) {
   llvm::Type *Ty = Init->getType();
   llvm::GlobalVariable *GV =
     new llvm::GlobalVariable(CGM.getModule(), Ty, false,
-                             llvm::GlobalValue::InternalLinkage, Init, Name);
-  if (Section)
+                             llvm::GlobalValue::PrivateLinkage, Init, Name);
+  assertPrivateName(GV);
+  if (!Section.empty())
     GV->setSection(Section);
   if (Align)
     GV->setAlignment(Align);
   if (AddToUsed)
-    CGM.AddUsedGlobal(GV);
+    CGM.addCompilerUsedGlobal(GV);
   return GV;
 }
 
 llvm::Function *CGObjCMac::ModuleInitFunction() {
   // Abuse this interface function as a place to finalize.
   FinishModule();
-  return NULL;
+  return nullptr;
 }
 
 llvm::Constant *CGObjCMac::GetPropertyGetFunction() {
@@ -3501,7 +3472,7 @@ namespace {
       : S(*S), SyncArgSlot(SyncArgSlot), CallTryExitVar(CallTryExitVar),
         ExceptionData(ExceptionData), ObjCTypes(*ObjCTypes) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Check whether we need to call objc_exception_try_exit.
       // In optimized code, this branch will always be folded.
       llvm::BasicBlock *FinallyCallExit =
@@ -3826,7 +3797,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   // @synchronized.  We can't avoid a temp here because we need the
   // value to be preserved.  If the backend ever does liveness
   // correctly after setjmp, this will be unnecessary.
-  llvm::Value *SyncArgSlot = 0;
+  llvm::Value *SyncArgSlot = nullptr;
   if (!isTry) {
     llvm::Value *SyncArg =
       CGF.EmitScalarExpr(cast<ObjCAtSynchronizedStmt>(S).getSynchExpr());
@@ -3861,7 +3832,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
 
   // A slot containing the exception to rethrow.  Only needed when we
   // have both a @catch and a @finally.
-  llvm::Value *PropagatingExnVar = 0;
+  llvm::Value *PropagatingExnVar = nullptr;
 
   // Push a normal cleanup to leave the try scope.
   CGF.EHStack.pushCleanup<PerformFragileFinally>(NormalAndEHCleanup, &S,
@@ -3927,10 +3898,10 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
 
     const ObjCAtTryStmt* AtTryStmt = cast<ObjCAtTryStmt>(&S);
 
-    bool HasFinally = (AtTryStmt->getFinallyStmt() != 0);
+    bool HasFinally = (AtTryStmt->getFinallyStmt() != nullptr);
 
-    llvm::BasicBlock *CatchBlock = 0;
-    llvm::BasicBlock *CatchHandler = 0;
+    llvm::BasicBlock *CatchBlock = nullptr;
+    llvm::BasicBlock *CatchHandler = nullptr;
     if (HasFinally) {
       // Save the currently-propagating exception before
       // objc_exception_try_enter clears the exception slot.
@@ -3968,7 +3939,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
       const ObjCAtCatchStmt *CatchStmt = AtTryStmt->getCatchStmt(I);
 
       const VarDecl *CatchParam = CatchStmt->getCatchParamDecl();
-      const ObjCObjectPointerType *OPT = 0;
+      const ObjCObjectPointerType *OPT = nullptr;
 
       // catch(...) always matches.
       if (!CatchParam) {
@@ -4293,14 +4264,14 @@ llvm::Value *CGObjCMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF,
 ///   unsigned flags;
 /// };
 enum ImageInfoFlags {
-  eImageInfo_FixAndContinue      = (1 << 0),
+  eImageInfo_FixAndContinue      = (1 << 0), // This flag is no longer set by clang.
   eImageInfo_GarbageCollected    = (1 << 1),
   eImageInfo_GCOnly              = (1 << 2),
-  eImageInfo_OptimizedByDyld     = (1 << 3), // FIXME: When is this set.
+  eImageInfo_OptimizedByDyld     = (1 << 3), // This flag is set by the dyld shared cache.
 
   // A flag indicating that the module has no instances of a @synthesize of a
   // superclass variable. <rdar://problem/6803242>
-  eImageInfo_CorrectedSynthesize = (1 << 4),
+  eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang.
   eImageInfo_ImageIsSimulated    = (1 << 5)
 };
 
@@ -4373,7 +4344,7 @@ void CGObjCMac::EmitModuleInfo() {
     llvm::ConstantInt::get(ObjCTypes.LongTy, ModuleVersion),
     llvm::ConstantInt::get(ObjCTypes.LongTy, Size),
     // This used to be the filename, now it is unused. <rdr://4327263>
-    GetClassName(&CGM.getContext().Idents.get("")),
+    GetClassName(StringRef("")),
     EmitModuleSymbols()
   };
   CreateMetadataVar("\01L_OBJC_MODULES",
@@ -4399,9 +4370,17 @@ llvm::Constant *CGObjCMac::EmitModuleSymbols() {
   // The runtime expects exactly the list of defined classes followed
   // by the list of defined categories, in a single array.
   SmallVector<llvm::Constant*, 8> Symbols(NumClasses + NumCategories);
-  for (unsigned i=0; i<NumClasses; i++)
+  for (unsigned i=0; i<NumClasses; i++) {
+    const ObjCInterfaceDecl *ID = ImplementedClasses[i];
+    assert(ID);
+    if (ObjCImplementationDecl *IMP = ID->getImplementation())
+      // We are implementing a weak imported interface. Give it external linkage
+      if (ID->isWeakImported() && !IMP->isWeakImported())
+        DefinedClasses[i]->setLinkage(llvm::GlobalVariable::ExternalLinkage);
+    
     Symbols[i] = llvm::ConstantExpr::getBitCast(DefinedClasses[i],
                                                 ObjCTypes.Int8PtrTy);
+  }
   for (unsigned i=0; i<NumCategories; i++)
     Symbols[NumClasses + i] =
       llvm::ConstantExpr::getBitCast(DefinedCategories[i],
@@ -4429,7 +4408,7 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF,
   
   if (!Entry) {
     llvm::Constant *Casted =
-    llvm::ConstantExpr::getBitCast(GetClassName(II),
+    llvm::ConstantExpr::getBitCast(GetClassName(II->getName()),
                                    ObjCTypes.ClassPtrTy);
     Entry =
     CreateMetadataVar("\01L_OBJC_CLASS_REFERENCES_", Casted,
@@ -4470,19 +4449,17 @@ llvm::Value *CGObjCMac::EmitSelector(CodeGenFunction &CGF, Selector Sel,
   return CGF.Builder.CreateLoad(Entry);
 }
 
-llvm::Constant *CGObjCCommonMac::GetClassName(IdentifierInfo *Ident) {
-  llvm::GlobalVariable *&Entry = ClassNames[Ident];
-
-  if (!Entry)
-    Entry = CreateMetadataVar("\01L_OBJC_CLASS_NAME_",
-                              llvm::ConstantDataArray::getString(VMContext,
-                                                         Ident->getNameStart()),
-                              ((ObjCABI == 2) ?
-                               "__TEXT,__objc_classname,cstring_literals" :
-                               "__TEXT,__cstring,cstring_literals"),
-                              1, true);
-
-  return getConstantGEP(VMContext, Entry, 0, 0);
+llvm::Constant *CGObjCCommonMac::GetClassName(StringRef RuntimeName) {
+    llvm::GlobalVariable *&Entry = ClassNames[RuntimeName];
+    if (!Entry)
+        Entry = CreateMetadataVar("\01L_OBJC_CLASS_NAME_",
+                                  llvm::ConstantDataArray::getString(VMContext,
+                                                                     RuntimeName),
+                                  ((ObjCABI == 2) ?
+                                   "__TEXT,__objc_classname,cstring_literals" :
+                                   "__TEXT,__cstring,cstring_literals"),
+                                  1, true);
+    return getConstantGEP(VMContext, Entry, 0, 0);
 }
 
 llvm::Function *CGObjCCommonMac::GetMethodDefinition(const ObjCMethodDecl *MD) {
@@ -4491,7 +4468,7 @@ llvm::Function *CGObjCCommonMac::GetMethodDefinition(const ObjCMethodDecl *MD) {
   if (I != MethodDefinitions.end())
     return I->second;
 
-  return NULL;
+  return nullptr;
 }
 
 /// GetIvarLayoutName - Returns a unique constant for the given
@@ -4507,16 +4484,13 @@ void CGObjCCommonMac::BuildAggrIvarRecordLayout(const RecordType *RT,
                                                 bool &HasUnion) {
   const RecordDecl *RD = RT->getDecl();
   // FIXME - Use iterator.
-  SmallVector<const FieldDecl*, 16> Fields;
-  for (RecordDecl::field_iterator i = RD->field_begin(),
-                                  e = RD->field_end(); i != e; ++i)
-    Fields.push_back(*i);
+  SmallVector<const FieldDecl*, 16> Fields(RD->fields());
   llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(RT, 0));
   const llvm::StructLayout *RecLayout =
     CGM.getDataLayout().getStructLayout(cast<llvm::StructType>(Ty));
 
-  BuildAggrIvarLayout(0, RecLayout, RD, Fields, BytePos,
-                      ForStrongLayout, HasUnion);
+  BuildAggrIvarLayout(nullptr, RecLayout, RD, Fields, BytePos, ForStrongLayout,
+                      HasUnion);
 }
 
 void CGObjCCommonMac::BuildAggrIvarLayout(const ObjCImplementationDecl *OI,
@@ -4528,9 +4502,9 @@ void CGObjCCommonMac::BuildAggrIvarLayout(const ObjCImplementationDecl *OI,
   bool IsUnion = (RD && RD->isUnion());
   uint64_t MaxUnionIvarSize = 0;
   uint64_t MaxSkippedUnionIvarSize = 0;
-  const FieldDecl *MaxField = 0;
-  const FieldDecl *MaxSkippedField = 0;
-  const FieldDecl *LastFieldBitfieldOrUnnamed = 0;
+  const FieldDecl *MaxField = nullptr;
+  const FieldDecl *MaxSkippedField = nullptr;
+  const FieldDecl *LastFieldBitfieldOrUnnamed = nullptr;
   uint64_t MaxFieldOffset = 0;
   uint64_t MaxSkippedFieldOffset = 0;
   uint64_t LastBitfieldOrUnnamedOffset = 0;
@@ -4565,7 +4539,7 @@ void CGObjCCommonMac::BuildAggrIvarLayout(const ObjCImplementationDecl *OI,
       continue;
     }
 
-    LastFieldBitfieldOrUnnamed = 0;
+    LastFieldBitfieldOrUnnamed = nullptr;
     QualType FQT = Field->getType();
     if (FQT->isRecordType() || FQT->isUnionType()) {
       if (FQT->isUnionType())
@@ -4589,9 +4563,6 @@ void CGObjCCommonMac::BuildAggrIvarLayout(const ObjCImplementationDecl *OI,
         ElCount *= CArray->getSize().getZExtValue();
         FQT = CArray->getElementType();
       }
-
-      assert(!FQT->isUnionType() &&
-             "layout for array of unions not supported");
       if (FQT->isRecordType() && ElCount) {
         int OldIndex = IvarsInfo.size() - 1;
         int OldSkIndex = SkipIvars.size() -1;
@@ -4859,7 +4830,8 @@ llvm::Constant *CGObjCCommonMac::BuildIvarLayout(
   SkipIvars.clear();
   IvarsInfo.clear();
 
-  BuildAggrIvarLayout(OMD, 0, 0, RecFields, 0, ForStrongLayout, hasUnion);
+  BuildAggrIvarLayout(OMD, nullptr, nullptr, RecFields, 0, ForStrongLayout,
+                      hasUnion);
   if (IvarsInfo.empty())
     return llvm::Constant::getNullValue(PtrTy);
   // Sort on byte position in case we encounterred a union nested in
@@ -4875,7 +4847,7 @@ llvm::Constant *CGObjCCommonMac::BuildIvarLayout(
    if (CGM.getLangOpts().ObjCGCBitmapPrint) {
     printf("\n%s ivar layout for class '%s': ",
            ForStrongLayout ? "strong" : "weak",
-           OMD->getClassInterface()->getName().data());
+           OMD->getClassInterface()->getName().str().c_str());
     const unsigned char *s = (const unsigned char*)BitMap.c_str();
     for (unsigned i = 0, e = BitMap.size(); i < e; i++)
       if (!(s[i] & 0xf0))
@@ -4928,7 +4900,7 @@ llvm::Constant *CGObjCCommonMac::GetMethodVarType(const ObjCMethodDecl *D,
                                                   bool Extended) {
   std::string TypeStr;
   if (CGM.getContext().getObjCEncodingForMethodDecl(D, TypeStr, Extended))
-    return 0;
+    return nullptr;
 
   llvm::GlobalVariable *&Entry = MethodVarTypes[TypeStr];
 
@@ -4948,11 +4920,10 @@ llvm::Constant *CGObjCCommonMac::GetPropertyName(IdentifierInfo *Ident) {
   llvm::GlobalVariable *&Entry = PropertyNames[Ident];
 
   if (!Entry)
-    Entry = CreateMetadataVar("\01L_OBJC_PROP_NAME_ATTR_",
-                        llvm::ConstantDataArray::getString(VMContext,
-                                                       Ident->getNameStart()),
-                              "__TEXT,__cstring,cstring_literals",
-                              1, true);
+    Entry = CreateMetadataVar(
+        "\01L_OBJC_PROP_NAME_ATTR_",
+        llvm::ConstantDataArray::getString(VMContext, Ident->getName()),
+        "__TEXT,__cstring,cstring_literals", 1, true);
 
   return getConstantGEP(VMContext, Entry, 0, 0);
 }
@@ -4992,14 +4963,14 @@ void CGObjCMac::FinishModule() {
 
     llvm::Constant *Values[5];
     Values[0] = llvm::Constant::getNullValue(ObjCTypes.ProtocolExtensionPtrTy);
-    Values[1] = GetClassName(I->first);
+    Values[1] = GetClassName(I->first->getName());
     Values[2] = llvm::Constant::getNullValue(ObjCTypes.ProtocolListPtrTy);
     Values[3] = Values[4] =
       llvm::Constant::getNullValue(ObjCTypes.MethodDescriptionListPtrTy);
-    I->second->setLinkage(llvm::GlobalValue::InternalLinkage);
+    assertPrivateName(I->second);
     I->second->setInitializer(llvm::ConstantStruct::get(ObjCTypes.ProtocolTy,
                                                         Values));
-    CGM.AddUsedGlobal(I->second);
+    CGM.addCompilerUsedGlobal(I->second);
   }
 
   // Add assembler directives to add lazy undefined symbol references
@@ -5035,14 +5006,14 @@ void CGObjCMac::FinishModule() {
 CGObjCNonFragileABIMac::CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm)
   : CGObjCCommonMac(cgm),
     ObjCTypes(cgm) {
-  ObjCEmptyCacheVar = ObjCEmptyVtableVar = NULL;
+  ObjCEmptyCacheVar = ObjCEmptyVtableVar = nullptr;
   ObjCABI = 2;
 }
 
 /* *** */
 
 ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
-  : VMContext(cgm.getLLVMContext()), CGM(cgm), ExternalProtocolPtrTy(0) 
+  : VMContext(cgm.getLLVMContext()), CGM(cgm), ExternalProtocolPtrTy(nullptr)
 {
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
   ASTContext &Ctx = CGM.getContext();
@@ -5054,6 +5025,14 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
   Int8PtrTy = CGM.Int8PtrTy;
   Int8PtrPtrTy = CGM.Int8PtrPtrTy;
 
+  // arm64 targets use "int" ivar offset variables. All others,
+  // including OS X x86_64 and Windows x86_64, use "long" ivar offsets.
+  if (CGM.getTarget().getTriple().getArch() == llvm::Triple::arm64 ||
+      CGM.getTarget().getTriple().getArch() == llvm::Triple::aarch64)
+    IvarOffsetVarTy = IntTy;
+  else
+    IvarOffsetVarTy = LongTy;
+
   ObjectPtrTy = Types.ConvertType(Ctx.getObjCIdType());
   PtrObjectPtrTy = llvm::PointerType::getUnqual(ObjectPtrTy);
   SelectorPtrTy = Types.ConvertType(Ctx.getObjCSelType());
@@ -5074,11 +5053,12 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
                                       Ctx.getTranslationUnitDecl(),
                                       SourceLocation(), SourceLocation(),
                                       &Ctx.Idents.get("_objc_super"));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.getObjCIdType(), 0, 0, false, ICIS_NoInit));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.getObjCClassType(), 0, 0, false,
-                                ICIS_NoInit));
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.getObjCIdType(), nullptr, nullptr,
+                                false, ICIS_NoInit));
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.getObjCClassType(), nullptr,
+                                nullptr, false, ICIS_NoInit));
   RD->completeDefinition();
 
   SuperCTy = Ctx.getTagDeclType(RD);
@@ -5357,16 +5337,15 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy);
 
   // struct _ivar_t {
-  //   unsigned long int *offset;  // pointer to ivar offset location
+  //   unsigned [long] int *offset;  // pointer to ivar offset location
   //   char *name;
   //   char *type;
   //   uint32_t alignment;
   //   uint32_t size;
   // }
-  IvarnfABITy =
-    llvm::StructType::create("struct._ivar_t",
-                             llvm::PointerType::getUnqual(LongTy),
-                             Int8PtrTy, Int8PtrTy, IntTy, IntTy, NULL);
+  IvarnfABITy = llvm::StructType::create(
+      "struct._ivar_t", llvm::PointerType::getUnqual(IvarOffsetVarTy),
+      Int8PtrTy, Int8PtrTy, IntTy, IntTy, NULL);
 
   // struct _ivar_list_t {
   //   uint32 entsize;  // sizeof(struct _ivar_t)
@@ -5456,11 +5435,12 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
                                       Ctx.getTranslationUnitDecl(),
                                       SourceLocation(), SourceLocation(),
                                       &Ctx.Idents.get("_message_ref_t"));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.VoidPtrTy, 0, 0, false, ICIS_NoInit));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.getObjCSelType(), 0, 0, false,
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.VoidPtrTy, nullptr, nullptr, false,
                                 ICIS_NoInit));
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.getObjCSelType(), nullptr, nullptr,
+                                false, ICIS_NoInit));
   RD->completeDefinition();
 
   MessageRefCTy = Ctx.getTagDeclType(RD);
@@ -5498,7 +5478,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
 llvm::Function *CGObjCNonFragileABIMac::ModuleInitFunction() {
   FinishNonFragileABIModule();
 
-  return NULL;
+  return nullptr;
 }
 
 void CGObjCNonFragileABIMac::
@@ -5521,12 +5501,13 @@ AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container,
 
   llvm::GlobalVariable *GV =
     new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
-                             llvm::GlobalValue::InternalLinkage,
+                             llvm::GlobalValue::PrivateLinkage,
                              Init,
                              SymbolName);
+  assertPrivateName(GV);
   GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType()));
   GV->setSection(SectionName);
-  CGM.AddUsedGlobal(GV);
+  CGM.addCompilerUsedGlobal(GV);
 }
 
 void CGObjCNonFragileABIMac::FinishNonFragileABIModule() {
@@ -5534,24 +5515,22 @@ void CGObjCNonFragileABIMac::FinishNonFragileABIModule() {
 
   // Build list of all implemented class addresses in array
   // L_OBJC_LABEL_CLASS_$.
+
+  for (unsigned i=0, NumClasses=ImplementedClasses.size(); i<NumClasses; i++) {
+    const ObjCInterfaceDecl *ID = ImplementedClasses[i];
+    assert(ID);
+    if (ObjCImplementationDecl *IMP = ID->getImplementation())
+      // We are implementing a weak imported interface. Give it external linkage
+      if (ID->isWeakImported() && !IMP->isWeakImported()) {
+        DefinedClasses[i]->setLinkage(llvm::GlobalVariable::ExternalLinkage);
+        DefinedMetaClasses[i]->setLinkage(llvm::GlobalVariable::ExternalLinkage);
+      }
+  }
+  
   AddModuleClassList(DefinedClasses,
                      "\01L_OBJC_LABEL_CLASS_$",
                      "__DATA, __objc_classlist, regular, no_dead_strip");
-  
-  for (unsigned i = 0, e = DefinedClasses.size(); i < e; i++) {
-    llvm::GlobalValue *IMPLGV = DefinedClasses[i];
-    if (IMPLGV->getLinkage() != llvm::GlobalValue::ExternalWeakLinkage)
-      continue;
-    IMPLGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
-  }
-  
-  for (unsigned i = 0, e = DefinedMetaClasses.size(); i < e; i++) {
-    llvm::GlobalValue *IMPLGV = DefinedMetaClasses[i];
-    if (IMPLGV->getLinkage() != llvm::GlobalValue::ExternalWeakLinkage)
-      continue;
-    IMPLGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
-  }    
-  
+
   AddModuleClassList(DefinedNonLazyClasses,
                      "\01L_OBJC_LABEL_NONLAZY_CLASS_$",
                      "__DATA, __objc_nlclslist, regular, no_dead_strip");
@@ -5650,7 +5629,7 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
   unsigned InstanceStart,
   unsigned InstanceSize,
   const ObjCImplementationDecl *ID) {
-  std::string ClassName = ID->getNameAsString();
+  std::string ClassName = ID->getObjCRuntimeNameAsString();
   llvm::Constant *Values[10]; // 11 for 64bit targets!
 
   if (CGM.getLangOpts().ObjCAutoRefCount)
@@ -5661,30 +5640,26 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
   Values[ 2] = llvm::ConstantInt::get(ObjCTypes.IntTy, InstanceSize);
   // FIXME. For 64bit targets add 0 here.
   Values[ 3] = (flags & NonFragileABI_Class_Meta)
-    ? GetIvarLayoutName(0, ObjCTypes)
+    ? GetIvarLayoutName(nullptr, ObjCTypes)
     : BuildIvarLayout(ID, true);
-  Values[ 4] = GetClassName(ID->getIdentifier());
+  Values[ 4] = GetClassName(ID->getObjCRuntimeNameAsString());
   // const struct _method_list_t * const baseMethods;
   std::vector<llvm::Constant*> Methods;
   std::string MethodListName("\01l_OBJC_$_");
   if (flags & NonFragileABI_Class_Meta) {
-    MethodListName += "CLASS_METHODS_" + ID->getNameAsString();
-    for (ObjCImplementationDecl::classmeth_iterator
-           i = ID->classmeth_begin(), e = ID->classmeth_end(); i != e; ++i) {
+    MethodListName += "CLASS_METHODS_";
+    MethodListName += ID->getObjCRuntimeNameAsString();
+    for (const auto *I : ID->class_methods())
       // Class methods should always be defined.
-      Methods.push_back(GetMethodConstant(*i));
-    }
+      Methods.push_back(GetMethodConstant(I));
   } else {
-    MethodListName += "INSTANCE_METHODS_" + ID->getNameAsString();
-    for (ObjCImplementationDecl::instmeth_iterator
-           i = ID->instmeth_begin(), e = ID->instmeth_end(); i != e; ++i) {
+    MethodListName += "INSTANCE_METHODS_";
+    MethodListName += ID->getObjCRuntimeNameAsString();
+    for (const auto *I : ID->instance_methods())
       // Instance methods should always be defined.
-      Methods.push_back(GetMethodConstant(*i));
-    }
-    for (ObjCImplementationDecl::propimpl_iterator
-           i = ID->propimpl_begin(), e = ID->propimpl_end(); i != e; ++i) {
-      ObjCPropertyImplDecl *PID = *i;
+      Methods.push_back(GetMethodConstant(I));
 
+    for (const auto *PID : ID->property_impls()) {
       if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize){
         ObjCPropertyDecl *PD = PID->getPropertyDecl();
 
@@ -5703,29 +5678,30 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
   const ObjCInterfaceDecl *OID = ID->getClassInterface();
   assert(OID && "CGObjCNonFragileABIMac::BuildClassRoTInitializer");
   Values[ 6] = EmitProtocolList("\01l_OBJC_CLASS_PROTOCOLS_$_"
-                                + OID->getName(),
+                                + OID->getObjCRuntimeNameAsString(),
                                 OID->all_referenced_protocol_begin(),
                                 OID->all_referenced_protocol_end());
 
   if (flags & NonFragileABI_Class_Meta) {
     Values[ 7] = llvm::Constant::getNullValue(ObjCTypes.IvarListnfABIPtrTy);
-    Values[ 8] = GetIvarLayoutName(0, ObjCTypes);
+    Values[ 8] = GetIvarLayoutName(nullptr, ObjCTypes);
     Values[ 9] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
   } else {
     Values[ 7] = EmitIvarList(ID);
     Values[ 8] = BuildIvarLayout(ID, false);
-    Values[ 9] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(),
+    Values[ 9] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
                                   ID, ID->getClassInterface(), ObjCTypes);
   }
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassRonfABITy,
                                                    Values);
   llvm::GlobalVariable *CLASS_RO_GV =
     new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassRonfABITy, false,
-                             llvm::GlobalValue::InternalLinkage,
+                             llvm::GlobalValue::PrivateLinkage,
                              Init,
                              (flags & NonFragileABI_Class_Meta) ?
                              std::string("\01l_OBJC_METACLASS_RO_$_")+ClassName :
                              std::string("\01l_OBJC_CLASS_RO_$_")+ClassName);
+  assertPrivateName(CLASS_RO_GV);
   CLASS_RO_GV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassRonfABITy));
   CLASS_RO_GV->setSection("__DATA, __objc_const");
@@ -5743,12 +5719,9 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
 ///   struct class_ro_t *ro;
 /// }
 ///
-llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassMetaData(
-  std::string &ClassName,
-  llvm::Constant *IsAGV,
-  llvm::Constant *SuperClassGV,
-  llvm::Constant *ClassRoGV,
-  bool HiddenVisibility) {
+llvm::GlobalVariable *CGObjCNonFragileABIMac::BuildClassMetaData(
+    const std::string &ClassName, llvm::Constant *IsAGV, llvm::Constant *SuperClassGV,
+    llvm::Constant *ClassRoGV, bool HiddenVisibility, bool Weak) {
   llvm::Constant *Values[] = {
     IsAGV,
     SuperClassGV,
@@ -5763,7 +5736,7 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassMetaData(
                   llvm::PointerType::getUnqual(ObjCTypes.ImpnfABITy));
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassnfABITy,
                                                    Values);
-  llvm::GlobalVariable *GV = GetClassGlobal(ClassName);
+  llvm::GlobalVariable *GV = GetClassGlobal(ClassName, Weak);
   GV->setInitializer(Init);
   GV->setSection("__DATA, __objc_data");
   GV->setAlignment(
@@ -5775,7 +5748,7 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassMetaData(
 
 bool
 CGObjCNonFragileABIMac::ImplementationIsNonLazy(const ObjCImplDecl *OD) const {
-  return OD->getClassMethod(GetNullarySelector("load")) != 0;
+  return OD->getClassMethod(GetNullarySelector("load")) != nullptr;
 }
 
 void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID,
@@ -5795,29 +5768,29 @@ void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID,
 }
 
 void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
-  std::string ClassName = ID->getNameAsString();
+  std::string ClassName = ID->getObjCRuntimeNameAsString();
   if (!ObjCEmptyCacheVar) {
     ObjCEmptyCacheVar = new llvm::GlobalVariable(
       CGM.getModule(),
       ObjCTypes.CacheTy,
       false,
       llvm::GlobalValue::ExternalLinkage,
-      0,
+      nullptr,
       "_objc_empty_cache");
-    
+
     // Make this entry NULL for any iOS device target, any iOS simulator target,
     // OS X with deployment target 10.9 or later.
     const llvm::Triple &Triple = CGM.getTarget().getTriple();
     if (Triple.isiOS() || (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9)))
       // This entry will be null.
-      ObjCEmptyVtableVar = 0;
+      ObjCEmptyVtableVar = nullptr;
     else
       ObjCEmptyVtableVar = new llvm::GlobalVariable(
                                                     CGM.getModule(),
                                                     ObjCTypes.ImpnfABITy,
                                                     false,
                                                     llvm::GlobalValue::ExternalLinkage,
-                                                    0,
+                                                    nullptr,
                                                     "_objc_empty_vtable");
   }
   assert(ID->getClassInterface() &&
@@ -5827,8 +5800,9 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassnfABITy);
   uint32_t InstanceSize = InstanceStart;
   uint32_t flags = NonFragileABI_Class_Meta;
-  std::string ObjCMetaClassName(getMetaclassSymbolPrefix());
-  std::string ObjCClassName(getClassSymbolPrefix());
+  llvm::SmallString<64> ObjCMetaClassName(getMetaclassSymbolPrefix());
+  llvm::SmallString<64> ObjCClassName(getClassSymbolPrefix());
+  llvm::SmallString<64> TClassName;
 
   llvm::GlobalVariable *SuperClassGV, *IsAGV;
 
@@ -5849,31 +5823,39 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
   if (!ID->getClassInterface()->getSuperClass()) {
     // class is root
     flags |= NonFragileABI_Class_Root;
-    SuperClassGV = GetClassGlobal(ObjCClassName + ClassName);
-    IsAGV = GetClassGlobal(ObjCMetaClassName + ClassName);
+    TClassName = ObjCClassName;
+    TClassName += ClassName;
+    SuperClassGV = GetClassGlobal(TClassName.str(),
+                                  ID->getClassInterface()->isWeakImported());
+    TClassName = ObjCMetaClassName;
+    TClassName += ClassName;
+    IsAGV = GetClassGlobal(TClassName.str(),
+                           ID->getClassInterface()->isWeakImported());
   } else {
     // Has a root. Current class is not a root.
     const ObjCInterfaceDecl *Root = ID->getClassInterface();
     while (const ObjCInterfaceDecl *Super = Root->getSuperClass())
       Root = Super;
-    IsAGV = GetClassGlobal(ObjCMetaClassName + Root->getNameAsString());
-    if (Root->isWeakImported())
-      IsAGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+    TClassName = ObjCMetaClassName ;
+    TClassName += Root->getObjCRuntimeNameAsString();
+    IsAGV = GetClassGlobal(TClassName.str(),
+                           Root->isWeakImported());
+
     // work on super class metadata symbol.
-    std::string SuperClassName =
-      ObjCMetaClassName + 
-        ID->getClassInterface()->getSuperClass()->getNameAsString();
-    SuperClassGV = GetClassGlobal(SuperClassName);
-    if (ID->getClassInterface()->getSuperClass()->isWeakImported())
-      SuperClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+    TClassName = ObjCMetaClassName;
+    TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString();
+    SuperClassGV = GetClassGlobal(
+                                  TClassName.str(),
+                                  ID->getClassInterface()->getSuperClass()->isWeakImported());
   }
   llvm::GlobalVariable *CLASS_RO_GV = BuildClassRoTInitializer(flags,
                                                                InstanceStart,
                                                                InstanceSize,ID);
-  std::string TClassName = ObjCMetaClassName + ClassName;
-  llvm::GlobalVariable *MetaTClass =
-    BuildClassMetaData(TClassName, IsAGV, SuperClassGV, CLASS_RO_GV,
-                       classIsHidden);
+  TClassName = ObjCMetaClassName;
+  TClassName += ClassName;
+  llvm::GlobalVariable *MetaTClass = BuildClassMetaData(
+      TClassName.str(), IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden,
+      ID->getClassInterface()->isWeakImported());
   DefinedMetaClasses.push_back(MetaTClass);
 
   // Metadata for the class
@@ -5899,14 +5881,14 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
 
   if (!ID->getClassInterface()->getSuperClass()) {
     flags |= NonFragileABI_Class_Root;
-    SuperClassGV = 0;
+    SuperClassGV = nullptr;
   } else {
     // Has a root. Current class is not a root.
-    std::string RootClassName =
-      ID->getClassInterface()->getSuperClass()->getNameAsString();
-    SuperClassGV = GetClassGlobal(ObjCClassName + RootClassName);
-    if (ID->getClassInterface()->getSuperClass()->isWeakImported())
-      SuperClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+    TClassName = ObjCClassName;
+    TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString();
+    SuperClassGV = GetClassGlobal(
+                                  TClassName.str(),
+                                  ID->getClassInterface()->getSuperClass()->isWeakImported());
   }
   GetClassSizeInfo(ID, InstanceStart, InstanceSize);
   CLASS_RO_GV = BuildClassRoTInitializer(flags,
@@ -5914,11 +5896,14 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
                                          InstanceSize,
                                          ID);
 
-  TClassName = ObjCClassName + ClassName;
+  TClassName = ObjCClassName;
+  TClassName += ClassName;
   llvm::GlobalVariable *ClassMD =
-    BuildClassMetaData(TClassName, MetaTClass, SuperClassGV, CLASS_RO_GV,
-                       classIsHidden);
+    BuildClassMetaData(TClassName.str(), MetaTClass, SuperClassGV, CLASS_RO_GV,
+                       classIsHidden,
+                       ID->getClassInterface()->isWeakImported());
   DefinedClasses.push_back(ClassMD);
+  ImplementedClasses.push_back(ID->getClassInterface());
 
   // Determine if this class is also "non-lazy".
   if (ImplementationIsNonLazy(ID))
@@ -5950,7 +5935,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
                                    ObjCTypes.getExternalProtocolPtrTy());
 
   std::string ProtocolName("\01l_OBJC_PROTOCOL_REFERENCE_$_");
-  ProtocolName += PD->getName();
+  ProtocolName += PD->getObjCRuntimeNameAsString();
 
   llvm::GlobalVariable *PTGV = CGM.getModule().getGlobalVariable(ProtocolName);
   if (PTGV)
@@ -5963,7 +5948,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
     ProtocolName);
   PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip");
   PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  CGM.AddUsedGlobal(PTGV);
+  CGM.addCompilerUsedGlobal(PTGV);
   return CGF.Builder.CreateLoad(PTGV);
 }
 
@@ -5980,58 +5965,63 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
 void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   const ObjCInterfaceDecl *Interface = OCD->getClassInterface();
   const char *Prefix = "\01l_OBJC_$_CATEGORY_";
-  std::string ExtCatName(Prefix + Interface->getNameAsString()+
-                         "_$_" + OCD->getNameAsString());
-  std::string ExtClassName(getClassSymbolPrefix() +
-                           Interface->getNameAsString());
+    
+  llvm::SmallString<64> ExtCatName(Prefix);
+  ExtCatName += Interface->getObjCRuntimeNameAsString();
+  ExtCatName += "_$_";
+  ExtCatName += OCD->getNameAsString();
+    
+  llvm::SmallString<64> ExtClassName(getClassSymbolPrefix());
+  ExtClassName += Interface->getObjCRuntimeNameAsString();
 
   llvm::Constant *Values[6];
-  Values[0] = GetClassName(OCD->getIdentifier());
+  Values[0] = GetClassName(OCD->getIdentifier()->getName());
   // meta-class entry symbol
-  llvm::GlobalVariable *ClassGV = GetClassGlobal(ExtClassName);
-  if (Interface->isWeakImported())
-    ClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
-  
+  llvm::GlobalVariable *ClassGV =
+      GetClassGlobal(ExtClassName.str(), Interface->isWeakImported());
+
   Values[1] = ClassGV;
   std::vector<llvm::Constant*> Methods;
-  std::string MethodListName(Prefix);
-  MethodListName += "INSTANCE_METHODS_" + Interface->getNameAsString() +
-    "_$_" + OCD->getNameAsString();
+  llvm::SmallString<64> MethodListName(Prefix);
+    
+  MethodListName += "INSTANCE_METHODS_";
+  MethodListName += Interface->getObjCRuntimeNameAsString();
+  MethodListName += "_$_";
+  MethodListName += OCD->getName();
 
-  for (ObjCCategoryImplDecl::instmeth_iterator
-         i = OCD->instmeth_begin(), e = OCD->instmeth_end(); i != e; ++i) {
+  for (const auto *I : OCD->instance_methods())
     // Instance methods should always be defined.
-    Methods.push_back(GetMethodConstant(*i));
-  }
+    Methods.push_back(GetMethodConstant(I));
 
-  Values[2] = EmitMethodList(MethodListName,
+  Values[2] = EmitMethodList(MethodListName.str(),
                              "__DATA, __objc_const",
                              Methods);
 
   MethodListName = Prefix;
-  MethodListName += "CLASS_METHODS_" + Interface->getNameAsString() + "_$_" +
-    OCD->getNameAsString();
+  MethodListName += "CLASS_METHODS_";
+  MethodListName += Interface->getObjCRuntimeNameAsString();
+  MethodListName += "_$_";
+  MethodListName += OCD->getNameAsString();
+    
   Methods.clear();
-  for (ObjCCategoryImplDecl::classmeth_iterator
-         i = OCD->classmeth_begin(), e = OCD->classmeth_end(); i != e; ++i) {
+  for (const auto *I : OCD->class_methods())
     // Class methods should always be defined.
-    Methods.push_back(GetMethodConstant(*i));
-  }
+    Methods.push_back(GetMethodConstant(I));
 
-  Values[3] = EmitMethodList(MethodListName,
+  Values[3] = EmitMethodList(MethodListName.str(),
                              "__DATA, __objc_const",
                              Methods);
   const ObjCCategoryDecl *Category =
     Interface->FindCategoryDeclaration(OCD->getIdentifier());
   if (Category) {
     SmallString<256> ExtName;
-    llvm::raw_svector_ostream(ExtName) << Interface->getName() << "_$_"
+    llvm::raw_svector_ostream(ExtName) << Interface->getObjCRuntimeNameAsString() << "_$_"
                                        << OCD->getName();
     Values[4] = EmitProtocolList("\01l_OBJC_CATEGORY_PROTOCOLS_$_"
-                                 + Interface->getName() + "_$_"
-                                 + Category->getName(),
-                                 Category->protocol_begin(),
-                                 Category->protocol_end());
+                                   + Interface->getObjCRuntimeNameAsString() + "_$_"
+                                   + Category->getName(),
+                                   Category->protocol_begin(),
+                                   Category->protocol_end());
     Values[5] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(),
                                  OCD, Category, ObjCTypes);
   } else {
@@ -6045,13 +6035,14 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   llvm::GlobalVariable *GCATV
     = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.CategorynfABITy,
                                false,
-                               llvm::GlobalValue::InternalLinkage,
+                               llvm::GlobalValue::PrivateLinkage,
                                Init,
-                               ExtCatName);
+                               ExtCatName.str());
+  assertPrivateName(GCATV);
   GCATV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(ObjCTypes.CategorynfABITy));
   GCATV->setSection("__DATA, __objc_const");
-  CGM.AddUsedGlobal(GCATV);
+  CGM.addCompilerUsedGlobal(GCATV);
   DefinedCategories.push_back(GCATV);
 
   // Determine if this category is also "non-lazy".
@@ -6068,7 +6059,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetMethodConstant(
   const ObjCMethodDecl *MD) {
   llvm::Function *Fn = GetMethodDefinition(MD);
   if (!Fn)
-    return 0;
+    return nullptr;
 
   llvm::Constant *Method[] = {
     llvm::ConstantExpr::getBitCast(GetMethodVarName(MD->getSelector()),
@@ -6107,10 +6098,11 @@ CGObjCNonFragileABIMac::EmitMethodList(Twine Name,
 
   llvm::GlobalVariable *GV =
     new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
-                             llvm::GlobalValue::InternalLinkage, Init, Name);
+                             llvm::GlobalValue::PrivateLinkage, Init, Name);
+  assertPrivateName(GV);
   GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType()));
   GV->setSection(Section);
-  CGM.AddUsedGlobal(GV);
+  CGM.addCompilerUsedGlobal(GV);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListnfABIPtrTy);
 }
 
@@ -6119,18 +6111,18 @@ CGObjCNonFragileABIMac::EmitMethodList(Twine Name,
 llvm::GlobalVariable *
 CGObjCNonFragileABIMac::ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
                                                const ObjCIvarDecl *Ivar) {
+    
   const ObjCInterfaceDecl *Container = Ivar->getContainingInterface();
-  std::string Name = "OBJC_IVAR_$_" + Container->getNameAsString() +
-    '.' + Ivar->getNameAsString();
+  llvm::SmallString<64> Name("OBJC_IVAR_$_");
+  Name += Container->getObjCRuntimeNameAsString();
+  Name += ".";
+  Name += Ivar->getName();
   llvm::GlobalVariable *IvarOffsetGV =
     CGM.getModule().getGlobalVariable(Name);
   if (!IvarOffsetGV)
-    IvarOffsetGV =
-      new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.LongTy,
-                               false,
-                               llvm::GlobalValue::ExternalLinkage,
-                               0,
-                               Name);
+    IvarOffsetGV = new llvm::GlobalVariable(
+      CGM.getModule(), ObjCTypes.IvarOffsetVarTy, false,
+      llvm::GlobalValue::ExternalLinkage, nullptr, Name.str());
   return IvarOffsetGV;
 }
 
@@ -6139,10 +6131,10 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID,
                                           const ObjCIvarDecl *Ivar,
                                           unsigned long int Offset) {
   llvm::GlobalVariable *IvarOffsetGV = ObjCIvarOffsetVariable(ID, Ivar);
-  IvarOffsetGV->setInitializer(llvm::ConstantInt::get(ObjCTypes.LongTy,
-                                                      Offset));
+  IvarOffsetGV->setInitializer(
+      llvm::ConstantInt::get(ObjCTypes.IvarOffsetVarTy, Offset));
   IvarOffsetGV->setAlignment(
-    CGM.getDataLayout().getABITypeAlignment(ObjCTypes.LongTy));
+      CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy));
 
   // FIXME: This matches gcc, but shouldn't the visibility be set on the use as
   // well (i.e., in ObjCIvarOffsetVariable).
@@ -6160,7 +6152,7 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID,
 /// implementation. The return value has type
 /// IvarListnfABIPtrTy.
 ///  struct _ivar_t {
-///   unsigned long int *offset;  // pointer to ivar offset location
+///   unsigned [long] int *offset;  // pointer to ivar offset location
 ///   char *name;
 ///   char *type;
 ///   uint32_t alignment;
@@ -6223,14 +6215,15 @@ llvm::Constant *CGObjCNonFragileABIMac::EmitIvarList(
   const char *Prefix = "\01l_OBJC_$_INSTANCE_VARIABLES_";
   llvm::GlobalVariable *GV =
     new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
-                             llvm::GlobalValue::InternalLinkage,
+                             llvm::GlobalValue::PrivateLinkage,
                              Init,
-                             Prefix + OID->getName());
+                             Prefix + OID->getObjCRuntimeNameAsString());
+  assertPrivateName(GV);
   GV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(Init->getType()));
   GV->setSection("__DATA, __objc_const");
 
-  CGM.AddUsedGlobal(GV);
+  CGM.addCompilerUsedGlobal(GV);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListnfABIPtrTy);
 }
 
@@ -6243,10 +6236,10 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
     // reference or not. At module finalization we add the empty
     // contents for protocols which were referenced but never defined.
     Entry =
-      new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy, false,
-                               llvm::GlobalValue::ExternalLinkage,
-                               0,
-                               "\01l_OBJC_PROTOCOL_$_" + PD->getName());
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy,
+                                 false, llvm::GlobalValue::WeakAnyLinkage,
+                                 nullptr,
+                                 "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString());
     Entry->setSection("__DATA,__datacoal_nt,coalesced");
   }
 
@@ -6287,9 +6280,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
   std::vector<llvm::Constant*> InstanceMethods, ClassMethods;
   std::vector<llvm::Constant*> OptInstanceMethods, OptClassMethods;
   std::vector<llvm::Constant*> MethodTypesExt, OptMethodTypesExt;
-  for (ObjCProtocolDecl::instmeth_iterator
-         i = PD->instmeth_begin(), e = PD->instmeth_end(); i != e; ++i) {
-    ObjCMethodDecl *MD = *i;
+  for (const auto *MD : PD->instance_methods()) {
     llvm::Constant *C = GetMethodDescriptionConstant(MD);
     if (!C)
       return GetOrEmitProtocolRef(PD);
@@ -6303,9 +6294,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
     }
   }
 
-  for (ObjCProtocolDecl::classmeth_iterator
-         i = PD->classmeth_begin(), e = PD->classmeth_end(); i != e; ++i) {
-    ObjCMethodDecl *MD = *i;
+  for (const auto *MD : PD->class_methods()) {
     llvm::Constant *C = GetMethodDescriptionConstant(MD);
     if (!C)
       return GetOrEmitProtocolRef(PD);
@@ -6325,48 +6314,48 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
   llvm::Constant *Values[11];
   // isa is NULL
   Values[0] = llvm::Constant::getNullValue(ObjCTypes.ObjectPtrTy);
-  Values[1] = GetClassName(PD->getIdentifier());
-  Values[2] = EmitProtocolList("\01l_OBJC_$_PROTOCOL_REFS_" + PD->getName(),
+  Values[1] = GetClassName(PD->getObjCRuntimeNameAsString());
+  Values[2] = EmitProtocolList("\01l_OBJC_$_PROTOCOL_REFS_" + PD->getObjCRuntimeNameAsString(),
                                PD->protocol_begin(),
                                PD->protocol_end());
 
   Values[3] = EmitMethodList("\01l_OBJC_$_PROTOCOL_INSTANCE_METHODS_"
-                             + PD->getName(),
+                             + PD->getObjCRuntimeNameAsString(),
                              "__DATA, __objc_const",
                              InstanceMethods);
   Values[4] = EmitMethodList("\01l_OBJC_$_PROTOCOL_CLASS_METHODS_"
-                             + PD->getName(),
+                             + PD->getObjCRuntimeNameAsString(),
                              "__DATA, __objc_const",
                              ClassMethods);
   Values[5] = EmitMethodList("\01l_OBJC_$_PROTOCOL_INSTANCE_METHODS_OPT_"
-                             + PD->getName(),
+                             + PD->getObjCRuntimeNameAsString(),
                              "__DATA, __objc_const",
                              OptInstanceMethods);
   Values[6] = EmitMethodList("\01l_OBJC_$_PROTOCOL_CLASS_METHODS_OPT_"
-                             + PD->getName(),
+                             + PD->getObjCRuntimeNameAsString(),
                              "__DATA, __objc_const",
                              OptClassMethods);
-  Values[7] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + PD->getName(),
-                               0, PD, ObjCTypes);
+  Values[7] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(),
+                               nullptr, PD, ObjCTypes);
   uint32_t Size =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ProtocolnfABITy);
   Values[8] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
   Values[9] = llvm::Constant::getNullValue(ObjCTypes.IntTy);
   Values[10] = EmitProtocolMethodTypes("\01l_OBJC_$_PROTOCOL_METHOD_TYPES_"
-                                       + PD->getName(),
+                                       + PD->getObjCRuntimeNameAsString(),
                                        MethodTypesExt, ObjCTypes);
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ProtocolnfABITy,
                                                    Values);
 
   if (Entry) {
-    // Already created, fix the linkage and update the initializer.
-    Entry->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+    // Already created, update the initializer.
+    assert(Entry->hasWeakAnyLinkage());
     Entry->setInitializer(Init);
   } else {
     Entry =
       new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy,
                                false, llvm::GlobalValue::WeakAnyLinkage, Init,
-                               "\01l_OBJC_PROTOCOL_$_" + PD->getName());
+                               "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString());
     Entry->setAlignment(
       CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABITy));
     Entry->setSection("__DATA,__datacoal_nt,coalesced");
@@ -6374,19 +6363,19 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
     Protocols[PD->getIdentifier()] = Entry;
   }
   Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  CGM.AddUsedGlobal(Entry);
+  CGM.addCompilerUsedGlobal(Entry);
 
   // Use this protocol meta-data to build protocol list table in section
   // __DATA, __objc_protolist
   llvm::GlobalVariable *PTGV =
     new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABIPtrTy,
                              false, llvm::GlobalValue::WeakAnyLinkage, Entry,
-                             "\01l_OBJC_LABEL_PROTOCOL_$_" + PD->getName());
+                             "\01l_OBJC_LABEL_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString());
   PTGV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy));
   PTGV->setSection("__DATA, __objc_protolist, coalesced, no_dead_strip");
   PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  CGM.AddUsedGlobal(PTGV);
+  CGM.addCompilerUsedGlobal(PTGV);
   return Entry;
 }
 
@@ -6433,12 +6422,13 @@ CGObjCNonFragileABIMac::EmitProtocolList(Twine Name,
 
   llvm::Constant *Init = llvm::ConstantStruct::getAnon(Values);
   GV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false,
-                                llvm::GlobalValue::InternalLinkage,
+                                llvm::GlobalValue::PrivateLinkage,
                                 Init, Name);
+  assertPrivateName(GV);
   GV->setSection("__DATA, __objc_const");
   GV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(Init->getType()));
-  CGM.AddUsedGlobal(GV);
+  CGM.addCompilerUsedGlobal(GV);
   return llvm::ConstantExpr::getBitCast(GV,
                                         ObjCTypes.ProtocolListnfABIPtrTy);
 }
@@ -6458,8 +6448,8 @@ CGObjCNonFragileABIMac::GetMethodDescriptionConstant(const ObjCMethodDecl *MD) {
                                    ObjCTypes.SelectorPtrTy);
   Desc[1] = GetMethodVarType(MD);
   if (!Desc[1])
-    return 0;
-  
+    return nullptr;
+
   // Protocol methods have no implementation. So, this entry is always NULL.
   Desc[2] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
   return llvm::ConstantStruct::get(ObjCTypes.MethodTy, Desc);
@@ -6479,12 +6469,6 @@ LValue CGObjCNonFragileABIMac::EmitObjCValueForIvar(
                                                unsigned CVRQualifiers) {
   ObjCInterfaceDecl *ID = ObjectTy->getAs<ObjCObjectType>()->getInterface();
   llvm::Value *Offset = EmitIvarOffset(CGF, ID, Ivar);
-
-  if (IsIvarOffsetKnownIdempotent(CGF, ID, Ivar))
-    if (llvm::LoadInst *LI = cast<llvm::LoadInst>(Offset))
-      LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
-                      llvm::MDNode::get(VMContext, ArrayRef<llvm::Value*>()));
-
   return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers,
                                   Offset);
 }
@@ -6493,7 +6477,20 @@ llvm::Value *CGObjCNonFragileABIMac::EmitIvarOffset(
   CodeGen::CodeGenFunction &CGF,
   const ObjCInterfaceDecl *Interface,
   const ObjCIvarDecl *Ivar) {
-  return CGF.Builder.CreateLoad(ObjCIvarOffsetVariable(Interface, Ivar),"ivar");
+  llvm::Value *IvarOffsetValue = ObjCIvarOffsetVariable(Interface, Ivar);
+  IvarOffsetValue = CGF.Builder.CreateLoad(IvarOffsetValue, "ivar");
+  if (IsIvarOffsetKnownIdempotent(CGF, Ivar))
+    cast<llvm::LoadInst>(IvarOffsetValue)
+        ->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
+                      llvm::MDNode::get(VMContext, ArrayRef<llvm::Value *>()));
+
+  // This could be 32bit int or 64bit integer depending on the architecture.
+  // Cast it to 64bit integer value, if it is a 32bit integer ivar offset value
+  //  as this is what caller always expectes.
+  if (ObjCTypes.IvarOffsetVarTy == ObjCTypes.IntTy)
+    IvarOffsetValue = CGF.Builder.CreateIntCast(
+        IvarOffsetValue, ObjCTypes.LongTy, true, "ivar.conv");
+  return IvarOffsetValue;
 }
 
 static void appendSelectorForMessageRefTable(std::string &buffer,
@@ -6539,7 +6536,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
 
   // Second argument: a pointer to the message ref structure.  Leave
   // the actual argument value blank for now.
-  args.add(RValue::get(0), ObjCTypes.MessageRefCPtrTy);
+  args.add(RValue::get(nullptr), ObjCTypes.MessageRefCPtrTy);
 
   args.insert(args.end(), formalArgs.begin(), formalArgs.end());
 
@@ -6554,9 +6551,9 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
   // The runtime currently never uses vtable dispatch for anything
   // except normal, non-super message-sends.
   // FIXME: don't use this for that.
-  llvm::Constant *fn = 0;
+  llvm::Constant *fn = nullptr;
   std::string messageRefName("\01l_");
-  if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) {
+  if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
     if (isSuper) {
       fn = ObjCTypes.getMessageSendSuper2StretFixupFn();
       messageRefName += "objc_msgSendSuper2_stret_fixup";
@@ -6603,9 +6600,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
   
   bool requiresnullCheck = false;
   if (CGM.getLangOpts().ObjCAutoRefCount && method)
-    for (ObjCMethodDecl::param_const_iterator i = method->param_begin(),
-         e = method->param_end(); i != e; ++i) {
-      const ParmVarDecl *ParamDecl = (*i);
+    for (const auto *ParamDecl : method->params()) {
       if (ParamDecl->hasAttr<NSConsumedAttr>()) {
         if (!nullReturn.NullBB)
           nullReturn.init(CGF, arg0);
@@ -6627,8 +6622,8 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
   callee = CGF.Builder.CreateBitCast(callee, MSI.MessengerType);
 
   RValue result = CGF.EmitCall(MSI.CallInfo, callee, returnSlot, args);
-  return nullReturn.complete(CGF, result, resultType, formalArgs, 
-                             requiresnullCheck ? method : 0);
+  return nullReturn.complete(CGF, result, resultType, formalArgs,
+                             requiresnullCheck ? method : nullptr);
 }
 
 /// Generate code for a message send expression in the nonfragile abi.
@@ -6652,49 +6647,56 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
 }
 
 llvm::GlobalVariable *
-CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name) {
+CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name, bool Weak) {
+  llvm::GlobalValue::LinkageTypes L =
+      Weak ? llvm::GlobalValue::ExternalWeakLinkage
+           : llvm::GlobalValue::ExternalLinkage;
+
   llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
 
-  if (!GV) {
+  if (!GV)
     GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy,
-                                  false, llvm::GlobalValue::ExternalLinkage,
-                                  0, Name);
-  }
+                                  false, L, nullptr, Name);
 
+  assert(GV->getLinkage() == L);
   return GV;
 }
 
 llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
-                                                        IdentifierInfo *II) {
+                                                        IdentifierInfo *II,
+                                                        bool Weak,
+                                                        const ObjCInterfaceDecl *ID) {
   llvm::GlobalVariable *&Entry = ClassReferences[II];
   
   if (!Entry) {
-    std::string ClassName(getClassSymbolPrefix() + II->getName().str());
-    llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName);
+    std::string ClassName(
+      getClassSymbolPrefix() +
+      (ID ? ID->getObjCRuntimeNameAsString() : II->getName()).str());
+    llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, Weak);
     Entry =
     new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
-                             false, llvm::GlobalValue::InternalLinkage,
+                             false, llvm::GlobalValue::PrivateLinkage,
                              ClassGV,
                              "\01L_OBJC_CLASSLIST_REFERENCES_$_");
     Entry->setAlignment(
                         CGM.getDataLayout().getABITypeAlignment(
                                                                 ObjCTypes.ClassnfABIPtrTy));
     Entry->setSection("__DATA, __objc_classrefs, regular, no_dead_strip");
-    CGM.AddUsedGlobal(Entry);
+    CGM.addCompilerUsedGlobal(Entry);
   }
-  
+  assertPrivateName(Entry);
   return CGF.Builder.CreateLoad(Entry);
 }
 
 llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
                                                   const ObjCInterfaceDecl *ID) {
-  return EmitClassRefFromId(CGF, ID->getIdentifier());
+  return EmitClassRefFromId(CGF, ID->getIdentifier(), ID->isWeakImported(), ID);
 }
 
 llvm::Value *CGObjCNonFragileABIMac::EmitNSAutoreleasePoolClassRef(
                                                     CodeGenFunction &CGF) {
   IdentifierInfo *II = &CGM.getContext().Idents.get("NSAutoreleasePool");
-  return EmitClassRefFromId(CGF, II);
+  return EmitClassRefFromId(CGF, II, false, 0);
 }
 
 llvm::Value *
@@ -6703,20 +6705,22 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
   llvm::GlobalVariable *&Entry = SuperClassReferences[ID->getIdentifier()];
 
   if (!Entry) {
-    std::string ClassName(getClassSymbolPrefix() + ID->getNameAsString());
-    llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName);
+    llvm::SmallString<64> ClassName(getClassSymbolPrefix());
+    ClassName += ID->getObjCRuntimeNameAsString();
+    llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName.str(),
+                                                   ID->isWeakImported());
     Entry =
       new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
-                               false, llvm::GlobalValue::InternalLinkage,
+                               false, llvm::GlobalValue::PrivateLinkage,
                                ClassGV,
                                "\01L_OBJC_CLASSLIST_SUP_REFS_$_");
     Entry->setAlignment(
       CGM.getDataLayout().getABITypeAlignment(
         ObjCTypes.ClassnfABIPtrTy));
     Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
-    CGM.AddUsedGlobal(Entry);
+    CGM.addCompilerUsedGlobal(Entry);
   }
-
+  assertPrivateName(Entry);
   return CGF.Builder.CreateLoad(Entry);
 }
 
@@ -6724,25 +6728,27 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
 /// meta-data
 ///
 llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
-                                                      const ObjCInterfaceDecl *ID) {
+                                                      const ObjCInterfaceDecl *ID,
+                                                      bool Weak) {
   llvm::GlobalVariable * &Entry = MetaClassReferences[ID->getIdentifier()];
-  if (Entry)
-    return CGF.Builder.CreateLoad(Entry);
-
-  std::string MetaClassName(getMetaclassSymbolPrefix() + ID->getNameAsString());
-  llvm::GlobalVariable *MetaClassGV = GetClassGlobal(MetaClassName);
-  Entry =
-    new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false,
-                             llvm::GlobalValue::InternalLinkage,
-                             MetaClassGV,
-                             "\01L_OBJC_CLASSLIST_SUP_REFS_$_");
-  Entry->setAlignment(
-    CGM.getDataLayout().getABITypeAlignment(
-      ObjCTypes.ClassnfABIPtrTy));
-
-  Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
-  CGM.AddUsedGlobal(Entry);
+  if (!Entry) {
+    llvm::SmallString<64> MetaClassName(getMetaclassSymbolPrefix());
+    MetaClassName += ID->getObjCRuntimeNameAsString();
+    llvm::GlobalVariable *MetaClassGV =
+      GetClassGlobal(MetaClassName.str(), Weak);
+      
+    Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
+                                     false, llvm::GlobalValue::PrivateLinkage,
+                                     MetaClassGV,
+                                     "\01L_OBJC_CLASSLIST_SUP_REFS_$_");
+    Entry->setAlignment(
+        CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABIPtrTy));
+
+    Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
+    CGM.addCompilerUsedGlobal(Entry);
+  }
 
+  assertPrivateName(Entry);
   return CGF.Builder.CreateLoad(Entry);
 }
 
@@ -6751,9 +6757,11 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
 llvm::Value *CGObjCNonFragileABIMac::GetClass(CodeGenFunction &CGF,
                                               const ObjCInterfaceDecl *ID) {
   if (ID->isWeakImported()) {
-    std::string ClassName(getClassSymbolPrefix() + ID->getNameAsString());
-    llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName);
-    ClassGV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+    llvm::SmallString<64> ClassName(getClassSymbolPrefix());
+    ClassName += ID->getObjCRuntimeNameAsString();
+    llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName.str(), true);
+    (void)ClassGV;
+    assert(ClassGV->hasExternalWeakLinkage());
   }
   
   return EmitClassRef(CGF, ID);
@@ -6787,7 +6795,8 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
   // If this is a class message the metaclass is passed as the target.
   llvm::Value *Target;
   if (IsClassMessage)
-      Target = EmitMetaClassRef(CGF, Class);
+      Target = EmitMetaClassRef(CGF, Class,
+                                (isCategoryImpl && Class->isWeakImported()));
   else
     Target = EmitSuperClassRef(CGF, Class);
 
@@ -6819,12 +6828,13 @@ llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF,
                                      ObjCTypes.SelectorPtrTy);
     Entry =
       new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.SelectorPtrTy, false,
-                               llvm::GlobalValue::InternalLinkage,
+                               llvm::GlobalValue::PrivateLinkage,
                                Casted, "\01L_OBJC_SELECTOR_REFERENCES_");
     Entry->setExternallyInitialized(true);
     Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip");
-    CGM.AddUsedGlobal(Entry);
+    CGM.addCompilerUsedGlobal(Entry);
   }
+  assertPrivateName(Entry);
 
   if (lval)
     return Entry;
@@ -6969,7 +6979,7 @@ CGObjCNonFragileABIMac::GetEHType(QualType T) {
         new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
                                  false,
                                  llvm::GlobalValue::ExternalLinkage,
-                                 0, "OBJC_EHTYPE_id");
+                                 nullptr, "OBJC_EHTYPE_id");
     return IDEHType;
   }
 
@@ -7024,17 +7034,18 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
     // attribute, emit an external reference.
     if (hasObjCExceptionAttribute(CGM.getContext(), ID))
       return Entry =
-        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
-                                 llvm::GlobalValue::ExternalLinkage,
-                                 0,
-                                 ("OBJC_EHTYPE_$_" +
-                                  ID->getIdentifier()->getName()));
+          new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
+                                   llvm::GlobalValue::ExternalLinkage,
+                                   nullptr,
+                                   ("OBJC_EHTYPE_$_" +
+                                    ID->getObjCRuntimeNameAsString()));
   }
 
   // Otherwise we need to either make a new entry or fill in the
   // initializer.
   assert((!Entry || !Entry->hasInitializer()) && "Duplicate EHType definition");
-  std::string ClassName(getClassSymbolPrefix() + ID->getNameAsString());
+  llvm::SmallString<64> ClassName(getClassSymbolPrefix());
+  ClassName += ID->getObjCRuntimeNameAsString();
   std::string VTableName = "objc_ehtype_vtable";
   llvm::GlobalVariable *VTableGV =
     CGM.getModule().getGlobalVariable(VTableName);
@@ -7042,39 +7053,42 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
     VTableGV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy,
                                         false,
                                         llvm::GlobalValue::ExternalLinkage,
-                                        0, VTableName);
+                                        nullptr, VTableName);
 
   llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2);
 
   llvm::Constant *Values[] = {
     llvm::ConstantExpr::getGetElementPtr(VTableGV, VTableIdx),
-    GetClassName(ID->getIdentifier()),
-    GetClassGlobal(ClassName)
+    GetClassName(ID->getObjCRuntimeNameAsString()),
+    GetClassGlobal(ClassName.str())
   };
   llvm::Constant *Init =
     llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values);
 
+  llvm::GlobalValue::LinkageTypes L = ForDefinition
+                                          ? llvm::GlobalValue::ExternalLinkage
+                                          : llvm::GlobalValue::WeakAnyLinkage;
   if (Entry) {
     Entry->setInitializer(Init);
   } else {
+    llvm::SmallString<64> EHTYPEName("OBJC_EHTYPE_$_");
+    EHTYPEName += ID->getObjCRuntimeNameAsString();
     Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
-                                     llvm::GlobalValue::WeakAnyLinkage,
+                                     L,
                                      Init,
-                                     ("OBJC_EHTYPE_$_" +
-                                      ID->getIdentifier()->getName()));
+                                     EHTYPEName.str());
   }
+  assert(Entry->getLinkage() == L);
 
   if (ID->getVisibility() == HiddenVisibility)
     Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
   Entry->setAlignment(CGM.getDataLayout().getABITypeAlignment(
       ObjCTypes.EHTypeTy));
 
-  if (ForDefinition) {
+  if (ForDefinition)
     Entry->setSection("__DATA,__objc_const");
-    Entry->setLinkage(llvm::GlobalValue::ExternalLinkage);
-  } else {
+  else
     Entry->setSection("__DATA,__datacoal_nt,coalesced");
-  }
 
   return Entry;
 }
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index d097b6fad2c2..3d013da51e65 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -21,7 +21,7 @@
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtObjC.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/CallSite.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -65,7 +65,7 @@ static uint64_t LookupFieldBitOffset(CodeGen::CodeGenModule &CGM,
 uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM,
                                               const ObjCInterfaceDecl *OID,
                                               const ObjCIvarDecl *Ivar) {
-  return LookupFieldBitOffset(CGM, OID, 0, Ivar) / 
+  return LookupFieldBitOffset(CGM, OID, nullptr, Ivar) /
     CGM.getContext().getCharWidth();
 }
 
@@ -116,7 +116,7 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
   // Note, there is a subtle invariant here: we can only call this routine on
   // non-synthesized ivars but we may be called for synthesized ivars.  However,
   // a synthesized ivar can never be a bit-field, so this is safe.
-  uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, 0, Ivar);
+  uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, nullptr, Ivar);
   uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth();
   uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign();
   uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext());
@@ -149,7 +149,7 @@ namespace {
     const VarDecl *Variable;
     const Stmt *Body;
     llvm::BasicBlock *Block;
-    llvm::Value *TypeInfo;
+    llvm::Constant *TypeInfo;
   };
 
   struct CallObjCEndCatch : EHScopeStack::Cleanup {
@@ -158,7 +158,7 @@ namespace {
     bool MightThrow;
     llvm::Value *Fn;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       if (!MightThrow) {
         CGF.Builder.CreateCall(Fn)->setDoesNotThrow();
         return;
@@ -201,7 +201,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
 
       // @catch(...) always matches.
       if (!CatchDecl) {
-        Handler.TypeInfo = 0; // catch-all
+        Handler.TypeInfo = nullptr; // catch-all
         // Don't consider any other catches.
         break;
       }
@@ -242,7 +242,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
 
     if (endCatchFn) {
       // Add a cleanup to leave the catch.
-      bool EndCatchMightThrow = (Handler.Variable == 0);
+      bool EndCatchMightThrow = (Handler.Variable == nullptr);
 
       CGF.EHStack.pushCleanup<CallObjCEndCatch>(NormalAndEHCleanup,
                                                 EndCatchMightThrow,
@@ -303,7 +303,7 @@ namespace {
     CallSyncExit(llvm::Value *SyncExitFn, llvm::Value *SyncArg)
       : SyncExitFn(SyncExitFn), SyncArg(SyncArg) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.Builder.CreateCall(SyncExitFn, SyncArg)->setDoesNotThrow();
     }
   };
diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h
index 7f030f2341da..fc6bee3fabed 100644
--- a/lib/CodeGen/CGObjCRuntime.h
+++ b/lib/CodeGen/CGObjCRuntime.h
@@ -156,8 +156,8 @@ public:
                       Selector Sel,
                       llvm::Value *Receiver,
                       const CallArgList &CallArgs,
-                      const ObjCInterfaceDecl *Class = 0,
-                      const ObjCMethodDecl *Method = 0) = 0;
+                      const ObjCInterfaceDecl *Class = nullptr,
+                      const ObjCMethodDecl *Method = nullptr) = 0;
 
   /// Generate an Objective-C message send operation to the super
   /// class initiated in a method for Class and with the given Self
@@ -175,7 +175,7 @@ public:
                            llvm::Value *Self,
                            bool IsClassMessage,
                            const CallArgList &CallArgs,
-                           const ObjCMethodDecl *Method = 0) = 0;
+                           const ObjCMethodDecl *Method = nullptr) = 0;
 
   /// Emit the code to return the named protocol as an object, as in a
   /// \@protocol expression.
@@ -268,7 +268,8 @@ public:
                                   const CodeGen::CGBlockInfo &blockInfo) = 0;
   virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
                                            QualType T) = 0;
-  virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name) = 0;
+  virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+                                               bool Weak = false) = 0;
 
   struct MessageSendInfo {
     const CGFunctionInfo &CallInfo;
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 7c454ac7c695..079ef7234d9c 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -33,32 +33,35 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
   assert(T->isOpenCLSpecificType() &&
          "Not an OpenCL specific type!");
 
+  llvm::LLVMContext& Ctx = CGM.getLLVMContext();
+  uint32_t ImgAddrSpc =
+    CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
   switch (cast<BuiltinType>(T)->getKind()) {
   default: 
     llvm_unreachable("Unexpected opencl builtin type!");
-    return 0;
+    return nullptr;
   case BuiltinType::OCLImage1d:
     return llvm::PointerType::get(llvm::StructType::create(
-                           CGM.getLLVMContext(), "opencl.image1d_t"), 0);
+                           Ctx, "opencl.image1d_t"), ImgAddrSpc);
   case BuiltinType::OCLImage1dArray:
     return llvm::PointerType::get(llvm::StructType::create(
-                           CGM.getLLVMContext(), "opencl.image1d_array_t"), 0);
+                           Ctx, "opencl.image1d_array_t"), ImgAddrSpc);
   case BuiltinType::OCLImage1dBuffer:
     return llvm::PointerType::get(llvm::StructType::create(
-                           CGM.getLLVMContext(), "opencl.image1d_buffer_t"), 0);
+                           Ctx, "opencl.image1d_buffer_t"), ImgAddrSpc);
   case BuiltinType::OCLImage2d:
     return llvm::PointerType::get(llvm::StructType::create(
-                           CGM.getLLVMContext(), "opencl.image2d_t"), 0);
+                           Ctx, "opencl.image2d_t"), ImgAddrSpc);
   case BuiltinType::OCLImage2dArray:
     return llvm::PointerType::get(llvm::StructType::create(
-                           CGM.getLLVMContext(), "opencl.image2d_array_t"), 0);
+                           Ctx, "opencl.image2d_array_t"), ImgAddrSpc);
   case BuiltinType::OCLImage3d:
     return llvm::PointerType::get(llvm::StructType::create(
-                           CGM.getLLVMContext(), "opencl.image3d_t"), 0);
+                           Ctx, "opencl.image3d_t"), ImgAddrSpc);
   case BuiltinType::OCLSampler:
-    return llvm::IntegerType::get(CGM.getLLVMContext(),32);
+    return llvm::IntegerType::get(Ctx, 32);
   case BuiltinType::OCLEvent:
     return llvm::PointerType::get(llvm::StructType::create(
-                           CGM.getLLVMContext(), "opencl.event_t"), 0);
+                           Ctx, "opencl.event_t"), 0);
   }
 }
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
new file mode 100644
index 000000000000..12a3a7790eec
--- /dev/null
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -0,0 +1,182 @@
+//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/Decl.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace clang;
+using namespace CodeGen;
+
+CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
+    : CGM(CGM), DefaultOpenMPPSource(nullptr) {
+  IdentTy = llvm::StructType::create(
+      "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
+      CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
+      CGM.Int8PtrTy /* psource */, NULL);
+  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
+  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
+                               llvm::PointerType::getUnqual(CGM.Int32Ty)};
+  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
+}
+
+llvm::Value *
+CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
+  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
+  if (!Entry) {
+    if (!DefaultOpenMPPSource) {
+      // Initialize default location for psource field of ident_t structure of
+      // all ident_t objects. Format is ";file;function;line;column;;".
+      // Taken from
+      // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
+      DefaultOpenMPPSource =
+          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
+      DefaultOpenMPPSource =
+          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
+    }
+    llvm::GlobalVariable *DefaultOpenMPLocation = cast<llvm::GlobalVariable>(
+        CGM.CreateRuntimeVariable(IdentTy, ".kmpc_default_loc.addr"));
+    DefaultOpenMPLocation->setUnnamedAddr(true);
+    DefaultOpenMPLocation->setConstant(true);
+    DefaultOpenMPLocation->setLinkage(llvm::GlobalValue::PrivateLinkage);
+
+    llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
+    llvm::Constant *Values[] = {Zero,
+                                llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+                                Zero, Zero, DefaultOpenMPPSource};
+    llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
+    DefaultOpenMPLocation->setInitializer(Init);
+    return DefaultOpenMPLocation;
+  }
+  return Entry;
+}
+
+llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
+    CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags) {
+  // If no debug info is generated - return global default location.
+  if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
+      Loc.isInvalid())
+    return GetOrCreateDefaultOpenMPLocation(Flags);
+
+  assert(CGF.CurFn && "No function in current CodeGenFunction.");
+
+  llvm::Value *LocValue = nullptr;
+  OpenMPLocMapTy::iterator I = OpenMPLocMap.find(CGF.CurFn);
+  if (I != OpenMPLocMap.end()) {
+    LocValue = I->second;
+  } else {
+    // Generate "ident_t .kmpc_loc.addr;"
+    llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
+    AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
+    OpenMPLocMap[CGF.CurFn] = AI;
+    LocValue = AI;
+
+    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+    CGF.Builder.CreateMemCpy(LocValue, GetOrCreateDefaultOpenMPLocation(Flags),
+                             llvm::ConstantExpr::getSizeOf(IdentTy),
+                             CGM.PointerAlignInBytes);
+  }
+
+  // char **psource = &.kmpc_loc_<flags>.addr.psource;
+  llvm::Value *PSource =
+      CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
+
+  auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
+  if (OMPDebugLoc == nullptr) {
+    SmallString<128> Buffer2;
+    llvm::raw_svector_ostream OS2(Buffer2);
+    // Build debug location
+    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+    OS2 << ";" << PLoc.getFilename() << ";";
+    if (const FunctionDecl *FD =
+            dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
+      OS2 << FD->getQualifiedNameAsString();
+    }
+    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
+    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
+    OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
+  }
+  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
+  CGF.Builder.CreateStore(OMPDebugLoc, PSource);
+
+  return LocValue;
+}
+
+llvm::Value *CGOpenMPRuntime::GetOpenMPGlobalThreadNum(CodeGenFunction &CGF,
+                                                       SourceLocation Loc) {
+  assert(CGF.CurFn && "No function in current CodeGenFunction.");
+
+  llvm::Value *GTid = nullptr;
+  OpenMPGtidMapTy::iterator I = OpenMPGtidMap.find(CGF.CurFn);
+  if (I != OpenMPGtidMap.end()) {
+    GTid = I->second;
+  } else {
+    // Generate "int32 .kmpc_global_thread_num.addr;"
+    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+    llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
+    GTid = CGF.EmitRuntimeCall(
+        CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
+    OpenMPGtidMap[CGF.CurFn] = GTid;
+  }
+  return GTid;
+}
+
+void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
+  assert(CGF.CurFn && "No function in current CodeGenFunction.");
+  if (OpenMPGtidMap.count(CGF.CurFn))
+    OpenMPGtidMap.erase(CGF.CurFn);
+  if (OpenMPLocMap.count(CGF.CurFn))
+    OpenMPLocMap.erase(CGF.CurFn);
+}
+
+llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
+  return llvm::PointerType::getUnqual(IdentTy);
+}
+
+llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
+  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
+}
+
+llvm::Constant *
+CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
+  llvm::Constant *RTLFn = nullptr;
+  switch (Function) {
+  case OMPRTL__kmpc_fork_call: {
+    // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
+    // microtask, ...);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                getKmpc_MicroPointerTy()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, true);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
+    break;
+  }
+  case OMPRTL__kmpc_global_thread_num: {
+    // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
+    break;
+  }
+  }
+  return RTLFn;
+}
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
new file mode 100644
index 000000000000..862e8a148c56
--- /dev/null
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -0,0 +1,177 @@
+//===----- CGOpenMPRuntime.h - Interface to OpenMP Runtimes -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_OPENMPRUNTIME_H
+#define CLANG_CODEGEN_OPENMPRUNTIME_H
+
+#include "clang/AST/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+class AllocaInst;
+class CallInst;
+class GlobalVariable;
+class Constant;
+class Function;
+class Module;
+class StructLayout;
+class FunctionType;
+class StructType;
+class Type;
+class Value;
+} // namespace llvm
+
+namespace clang {
+
+namespace CodeGen {
+
+class CodeGenFunction;
+class CodeGenModule;
+
+class CGOpenMPRuntime {
+public:
+  /// \brief Values for bit flags used in the ident_t to describe the fields.
+  /// All enumeric elements are named and described in accordance with the code
+  /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+  enum OpenMPLocationFlags {
+    /// \brief Use trampoline for internal microtask.
+    OMP_IDENT_IMD = 0x01,
+    /// \brief Use c-style ident structure.
+    OMP_IDENT_KMPC = 0x02,
+    /// \brief Atomic reduction option for kmpc_reduce.
+    OMP_ATOMIC_REDUCE = 0x10,
+    /// \brief Explicit 'barrier' directive.
+    OMP_IDENT_BARRIER_EXPL = 0x20,
+    /// \brief Implicit barrier in code.
+    OMP_IDENT_BARRIER_IMPL = 0x40,
+    /// \brief Implicit barrier in 'for' directive.
+    OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
+    /// \brief Implicit barrier in 'sections' directive.
+    OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
+    /// \brief Implicit barrier in 'single' directive.
+    OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
+  };
+  enum OpenMPRTLFunction {
+    // Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
+    // microtask, ...);
+    OMPRTL__kmpc_fork_call,
+    // Call to kmp_int32 kmpc_global_thread_num(ident_t *loc);
+    OMPRTL__kmpc_global_thread_num
+  };
+
+private:
+  CodeGenModule &CGM;
+  /// \brief Default const ident_t object used for initialization of all other
+  /// ident_t objects.
+  llvm::Constant *DefaultOpenMPPSource;
+  /// \brief Map of flags and corrsponding default locations.
+  typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
+  OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
+  llvm::Value *GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags);
+  /// \brief Describes ident structure that describes a source location.
+  /// All descriptions are taken from
+  /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+  /// Original structure:
+  /// typedef struct ident {
+  ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
+  ///                                  see above  */
+  ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
+  ///                                  KMP_IDENT_KMPC identifies this union
+  ///                                  member  */
+  ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
+  ///                                  see above */
+  ///#if USE_ITT_BUILD
+  ///                            /*  but currently used for storing
+  ///                                region-specific ITT */
+  ///                            /*  contextual information. */
+  ///#endif /* USE_ITT_BUILD */
+  ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
+  ///                                 C++  */
+  ///    char const *psource;    /**< String describing the source location.
+  ///                            The string is composed of semi-colon separated
+  //                             fields which describe the source file,
+  ///                            the function and a pair of line numbers that
+  ///                            delimit the construct.
+  ///                             */
+  /// } ident_t;
+  enum IdentFieldIndex {
+    /// \brief might be used in Fortran
+    IdentField_Reserved_1,
+    /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
+    IdentField_Flags,
+    /// \brief Not really used in Fortran any more
+    IdentField_Reserved_2,
+    /// \brief Source[4] in Fortran, do not use for C++
+    IdentField_Reserved_3,
+    /// \brief String describing the source location. The string is composed of
+    /// semi-colon separated fields which describe the source file, the function
+    /// and a pair of line numbers that delimit the construct.
+    IdentField_PSource
+  };
+  llvm::StructType *IdentTy;
+  /// \brief Map for Sourcelocation and OpenMP runtime library debug locations.
+  typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
+  OpenMPDebugLocMapTy OpenMPDebugLocMap;
+  /// \brief The type for a microtask which gets passed to __kmpc_fork_call().
+  /// Original representation is:
+  /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
+  llvm::FunctionType *Kmpc_MicroTy;
+  /// \brief Map of local debug location and functions.
+  typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPLocMapTy;
+  OpenMPLocMapTy OpenMPLocMap;
+  /// \brief Map of local gtid and functions.
+  typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPGtidMapTy;
+  OpenMPGtidMapTy OpenMPGtidMap;
+
+public:
+  explicit CGOpenMPRuntime(CodeGenModule &CGM);
+  ~CGOpenMPRuntime() {}
+
+  /// \brief Cleans up references to the objects in finished function.
+  /// \param CGF Reference to finished CodeGenFunction.
+  ///
+  void FunctionFinished(CodeGenFunction &CGF);
+
+  /// \brief Emits object of ident_t type with info for source location.
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param Flags Flags for OpenMP location.
+  ///
+  llvm::Value *
+  EmitOpenMPUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
+                           OpenMPLocationFlags Flags = OMP_IDENT_KMPC);
+
+  /// \brief Generates global thread number value.
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  ///
+  llvm::Value *GetOpenMPGlobalThreadNum(CodeGenFunction &CGF,
+                                        SourceLocation Loc);
+
+  /// \brief Returns pointer to ident_t type;
+  llvm::Type *getIdentTyPointerTy();
+
+  /// \brief Returns pointer to kmpc_micro type;
+  llvm::Type *getKmpc_MicroPointerTy();
+
+  /// \brief Returns specified OpenMP runtime function.
+  /// \param Function OpenMP runtime function.
+  /// \return Specified function.
+  llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function);
+};
+} // namespace CodeGen
+} // namespace clang
+
+#endif
diff --git a/lib/CodeGen/CGRTTI.cpp b/lib/CodeGen/CGRTTI.cpp
deleted file mode 100644
index aa687b956098..000000000000
--- a/lib/CodeGen/CGRTTI.cpp
+++ /dev/null
@@ -1,977 +0,0 @@
-//===--- CGCXXRTTI.cpp - Emit LLVM Code for C++ RTTI descriptors ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This contains code dealing with C++ code generation of RTTI descriptors.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CodeGenModule.h"
-#include "CGCXXABI.h"
-#include "CGObjCRuntime.h"
-#include "clang/AST/RecordLayout.h"
-#include "clang/AST/Type.h"
-#include "clang/Frontend/CodeGenOptions.h"
-
-using namespace clang;
-using namespace CodeGen;
-
-namespace {
-class RTTIBuilder {
-  CodeGenModule &CGM;  // Per-module state.
-  llvm::LLVMContext &VMContext;
-  
-  /// Fields - The fields of the RTTI descriptor currently being built.
-  SmallVector<llvm::Constant *, 16> Fields;
-
-  /// GetAddrOfTypeName - Returns the mangled type name of the given type.
-  llvm::GlobalVariable *
-  GetAddrOfTypeName(QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage);
-
-  /// GetAddrOfExternalRTTIDescriptor - Returns the constant for the RTTI 
-  /// descriptor of the given type.
-  llvm::Constant *GetAddrOfExternalRTTIDescriptor(QualType Ty);
-  
-  /// BuildVTablePointer - Build the vtable pointer for the given type.
-  void BuildVTablePointer(const Type *Ty);
-  
-  /// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
-  /// inheritance, according to the Itanium C++ ABI, 2.9.5p6b.
-  void BuildSIClassTypeInfo(const CXXRecordDecl *RD);
-  
-  /// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
-  /// classes with bases that do not satisfy the abi::__si_class_type_info 
-  /// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
-  void BuildVMIClassTypeInfo(const CXXRecordDecl *RD);
-  
-  /// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct, used
-  /// for pointer types.
-  void BuildPointerTypeInfo(QualType PointeeTy);
-
-  /// BuildObjCObjectTypeInfo - Build the appropriate kind of
-  /// type_info for an object type.
-  void BuildObjCObjectTypeInfo(const ObjCObjectType *Ty);
-  
-  /// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info 
-  /// struct, used for member pointer types.
-  void BuildPointerToMemberTypeInfo(const MemberPointerType *Ty);
-  
-public:
-  RTTIBuilder(CodeGenModule &CGM) : CGM(CGM), 
-    VMContext(CGM.getModule().getContext()) { }
-
-  // Pointer type info flags.
-  enum {
-    /// PTI_Const - Type has const qualifier.
-    PTI_Const = 0x1,
-    
-    /// PTI_Volatile - Type has volatile qualifier.
-    PTI_Volatile = 0x2,
-    
-    /// PTI_Restrict - Type has restrict qualifier.
-    PTI_Restrict = 0x4,
-    
-    /// PTI_Incomplete - Type is incomplete.
-    PTI_Incomplete = 0x8,
-    
-    /// PTI_ContainingClassIncomplete - Containing class is incomplete.
-    /// (in pointer to member).
-    PTI_ContainingClassIncomplete = 0x10
-  };
-  
-  // VMI type info flags.
-  enum {
-    /// VMI_NonDiamondRepeat - Class has non-diamond repeated inheritance.
-    VMI_NonDiamondRepeat = 0x1,
-    
-    /// VMI_DiamondShaped - Class is diamond shaped.
-    VMI_DiamondShaped = 0x2
-  };
-  
-  // Base class type info flags.
-  enum {
-    /// BCTI_Virtual - Base class is virtual.
-    BCTI_Virtual = 0x1,
-    
-    /// BCTI_Public - Base class is public.
-    BCTI_Public = 0x2
-  };
-  
-  /// BuildTypeInfo - Build the RTTI type info struct for the given type.
-  ///
-  /// \param Force - true to force the creation of this RTTI value
-  llvm::Constant *BuildTypeInfo(QualType Ty, bool Force = false);
-};
-}
-
-llvm::GlobalVariable *
-RTTIBuilder::GetAddrOfTypeName(QualType Ty, 
-                               llvm::GlobalVariable::LinkageTypes Linkage) {
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
-  CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(Ty, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
-
-  // We know that the mangled name of the type starts at index 4 of the
-  // mangled name of the typename, so we can just index into it in order to
-  // get the mangled name of the type.
-  llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext,
-                                                            Name.substr(4));
-
-  llvm::GlobalVariable *GV = 
-    CGM.CreateOrReplaceCXXRuntimeVariable(Name, Init->getType(), Linkage);
-
-  GV->setInitializer(Init);
-
-  return GV;
-}
-
-llvm::Constant *RTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
-  // Mangle the RTTI name.
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
-  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
-
-  // Look for an existing global.
-  llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name);
-  
-  if (!GV) {
-    // Create a new global variable.
-    GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
-                                  /*Constant=*/true,
-                                  llvm::GlobalValue::ExternalLinkage, 0, Name);
-  }
-  
-  return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
-}
-
-/// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type
-/// info for that type is defined in the standard library.
-static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
-  // Itanium C++ ABI 2.9.2:
-  //   Basic type information (e.g. for "int", "bool", etc.) will be kept in
-  //   the run-time support library. Specifically, the run-time support
-  //   library should contain type_info objects for the types X, X* and 
-  //   X const*, for every X in: void, std::nullptr_t, bool, wchar_t, char,
-  //   unsigned char, signed char, short, unsigned short, int, unsigned int,
-  //   long, unsigned long, long long, unsigned long long, float, double,
-  //   long double, char16_t, char32_t, and the IEEE 754r decimal and 
-  //   half-precision floating point types.
-  switch (Ty->getKind()) {
-    case BuiltinType::Void:
-    case BuiltinType::NullPtr:
-    case BuiltinType::Bool:
-    case BuiltinType::WChar_S:
-    case BuiltinType::WChar_U:
-    case BuiltinType::Char_U:
-    case BuiltinType::Char_S:
-    case BuiltinType::UChar:
-    case BuiltinType::SChar:
-    case BuiltinType::Short:
-    case BuiltinType::UShort:
-    case BuiltinType::Int:
-    case BuiltinType::UInt:
-    case BuiltinType::Long:
-    case BuiltinType::ULong:
-    case BuiltinType::LongLong:
-    case BuiltinType::ULongLong:
-    case BuiltinType::Half:
-    case BuiltinType::Float:
-    case BuiltinType::Double:
-    case BuiltinType::LongDouble:
-    case BuiltinType::Char16:
-    case BuiltinType::Char32:
-    case BuiltinType::Int128:
-    case BuiltinType::UInt128:
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage3d:
-    case BuiltinType::OCLSampler:
-    case BuiltinType::OCLEvent:
-      return true;
-      
-    case BuiltinType::Dependent:
-#define BUILTIN_TYPE(Id, SingletonId)
-#define PLACEHOLDER_TYPE(Id, SingletonId) \
-    case BuiltinType::Id:
-#include "clang/AST/BuiltinTypes.def"
-      llvm_unreachable("asking for RRTI for a placeholder type!");
-      
-    case BuiltinType::ObjCId:
-    case BuiltinType::ObjCClass:
-    case BuiltinType::ObjCSel:
-      llvm_unreachable("FIXME: Objective-C types are unsupported!");
-  }
-
-  llvm_unreachable("Invalid BuiltinType Kind!");
-}
-
-static bool TypeInfoIsInStandardLibrary(const PointerType *PointerTy) {
-  QualType PointeeTy = PointerTy->getPointeeType();
-  const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(PointeeTy);
-  if (!BuiltinTy)
-    return false;
-    
-  // Check the qualifiers.
-  Qualifiers Quals = PointeeTy.getQualifiers();
-  Quals.removeConst();
-    
-  if (!Quals.empty())
-    return false;
-    
-  return TypeInfoIsInStandardLibrary(BuiltinTy);
-}
-
-/// IsStandardLibraryRTTIDescriptor - Returns whether the type
-/// information for the given type exists in the standard library.
-static bool IsStandardLibraryRTTIDescriptor(QualType Ty) {
-  // Type info for builtin types is defined in the standard library.
-  if (const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(Ty))
-    return TypeInfoIsInStandardLibrary(BuiltinTy);
-  
-  // Type info for some pointer types to builtin types is defined in the
-  // standard library.
-  if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
-    return TypeInfoIsInStandardLibrary(PointerTy);
-
-  return false;
-}
-
-/// ShouldUseExternalRTTIDescriptor - Returns whether the type information for
-/// the given type exists somewhere else, and that we should not emit the type
-/// information in this translation unit.  Assumes that it is not a
-/// standard-library type.
-static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
-                                            QualType Ty) {
-  ASTContext &Context = CGM.getContext();
-
-  // If RTTI is disabled, assume it might be disabled in the
-  // translation unit that defines any potential key function, too.
-  if (!Context.getLangOpts().RTTI) return false;
-
-  if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
-    const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
-    if (!RD->hasDefinition())
-      return false;
-
-    if (!RD->isDynamicClass())
-      return false;
-
-    // FIXME: this may need to be reconsidered if the key function
-    // changes.
-    return CGM.getVTables().isVTableExternal(RD);
-  }
-  
-  return false;
-}
-
-/// IsIncompleteClassType - Returns whether the given record type is incomplete.
-static bool IsIncompleteClassType(const RecordType *RecordTy) {
-  return !RecordTy->getDecl()->isCompleteDefinition();
-}  
-
-/// ContainsIncompleteClassType - Returns whether the given type contains an
-/// incomplete class type. This is true if
-///
-///   * The given type is an incomplete class type.
-///   * The given type is a pointer type whose pointee type contains an 
-///     incomplete class type.
-///   * The given type is a member pointer type whose class is an incomplete
-///     class type.
-///   * The given type is a member pointer type whoise pointee type contains an
-///     incomplete class type.
-/// is an indirect or direct pointer to an incomplete class type.
-static bool ContainsIncompleteClassType(QualType Ty) {
-  if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
-    if (IsIncompleteClassType(RecordTy))
-      return true;
-  }
-  
-  if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
-    return ContainsIncompleteClassType(PointerTy->getPointeeType());
-  
-  if (const MemberPointerType *MemberPointerTy = 
-      dyn_cast<MemberPointerType>(Ty)) {
-    // Check if the class type is incomplete.
-    const RecordType *ClassType = cast<RecordType>(MemberPointerTy->getClass());
-    if (IsIncompleteClassType(ClassType))
-      return true;
-    
-    return ContainsIncompleteClassType(MemberPointerTy->getPointeeType());
-  }
-  
-  return false;
-}
-
-/// getTypeInfoLinkage - Return the linkage that the type info and type info
-/// name constants should have for the given type.
-static llvm::GlobalVariable::LinkageTypes 
-getTypeInfoLinkage(CodeGenModule &CGM, QualType Ty) {
-  // Itanium C++ ABI 2.9.5p7:
-  //   In addition, it and all of the intermediate abi::__pointer_type_info 
-  //   structs in the chain down to the abi::__class_type_info for the
-  //   incomplete class type must be prevented from resolving to the 
-  //   corresponding type_info structs for the complete class type, possibly
-  //   by making them local static objects. Finally, a dummy class RTTI is
-  //   generated for the incomplete type that will not resolve to the final 
-  //   complete class RTTI (because the latter need not exist), possibly by 
-  //   making it a local static object.
-  if (ContainsIncompleteClassType(Ty))
-    return llvm::GlobalValue::InternalLinkage;
-  
-  switch (Ty->getLinkage()) {
-  case NoLinkage:
-  case VisibleNoLinkage:
-  case InternalLinkage:
-  case UniqueExternalLinkage:
-    return llvm::GlobalValue::InternalLinkage;
-
-  case ExternalLinkage:
-    if (!CGM.getLangOpts().RTTI) {
-      // RTTI is not enabled, which means that this type info struct is going
-      // to be used for exception handling. Give it linkonce_odr linkage.
-      return llvm::GlobalValue::LinkOnceODRLinkage;
-    }
-
-    if (const RecordType *Record = dyn_cast<RecordType>(Ty)) {
-      const CXXRecordDecl *RD = cast<CXXRecordDecl>(Record->getDecl());
-      if (RD->hasAttr<WeakAttr>())
-        return llvm::GlobalValue::WeakODRLinkage;
-      if (RD->isDynamicClass())
-        return CGM.getVTableLinkage(RD);
-    }
-
-    return llvm::GlobalValue::LinkOnceODRLinkage;
-  }
-
-  llvm_unreachable("Invalid linkage!");
-}
-
-// CanUseSingleInheritance - Return whether the given record decl has a "single, 
-// public, non-virtual base at offset zero (i.e. the derived class is dynamic 
-// iff the base is)", according to Itanium C++ ABI, 2.95p6b.
-static bool CanUseSingleInheritance(const CXXRecordDecl *RD) {
-  // Check the number of bases.
-  if (RD->getNumBases() != 1)
-    return false;
-  
-  // Get the base.
-  CXXRecordDecl::base_class_const_iterator Base = RD->bases_begin();
-  
-  // Check that the base is not virtual.
-  if (Base->isVirtual())
-    return false;
-  
-  // Check that the base is public.
-  if (Base->getAccessSpecifier() != AS_public)
-    return false;
-  
-  // Check that the class is dynamic iff the base is.
-  const CXXRecordDecl *BaseDecl = 
-    cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
-  if (!BaseDecl->isEmpty() && 
-      BaseDecl->isDynamicClass() != RD->isDynamicClass())
-    return false;
-  
-  return true;
-}
-
-void RTTIBuilder::BuildVTablePointer(const Type *Ty) {
-  // abi::__class_type_info.
-  static const char * const ClassTypeInfo =
-    "_ZTVN10__cxxabiv117__class_type_infoE";
-  // abi::__si_class_type_info.
-  static const char * const SIClassTypeInfo =
-    "_ZTVN10__cxxabiv120__si_class_type_infoE";
-  // abi::__vmi_class_type_info.
-  static const char * const VMIClassTypeInfo =
-    "_ZTVN10__cxxabiv121__vmi_class_type_infoE";
-
-  const char *VTableName = 0;
-
-  switch (Ty->getTypeClass()) {
-#define TYPE(Class, Base)
-#define ABSTRACT_TYPE(Class, Base)
-#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
-#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
-#define DEPENDENT_TYPE(Class, Base) case Type::Class:
-#include "clang/AST/TypeNodes.def"
-    llvm_unreachable("Non-canonical and dependent types shouldn't get here");
-
-  case Type::LValueReference:
-  case Type::RValueReference:
-    llvm_unreachable("References shouldn't get here");
-
-  case Type::Auto:
-    llvm_unreachable("Undeduced auto type shouldn't get here");
-
-  case Type::Builtin:
-  // GCC treats vector and complex types as fundamental types.
-  case Type::Vector:
-  case Type::ExtVector:
-  case Type::Complex:
-  case Type::Atomic:
-  // FIXME: GCC treats block pointers as fundamental types?!
-  case Type::BlockPointer:
-    // abi::__fundamental_type_info.
-    VTableName = "_ZTVN10__cxxabiv123__fundamental_type_infoE";
-    break;
-
-  case Type::ConstantArray:
-  case Type::IncompleteArray:
-  case Type::VariableArray:
-    // abi::__array_type_info.
-    VTableName = "_ZTVN10__cxxabiv117__array_type_infoE";
-    break;
-
-  case Type::FunctionNoProto:
-  case Type::FunctionProto:
-    // abi::__function_type_info.
-    VTableName = "_ZTVN10__cxxabiv120__function_type_infoE";
-    break;
-
-  case Type::Enum:
-    // abi::__enum_type_info.
-    VTableName = "_ZTVN10__cxxabiv116__enum_type_infoE";
-    break;
-
-  case Type::Record: {
-    const CXXRecordDecl *RD = 
-      cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
-    
-    if (!RD->hasDefinition() || !RD->getNumBases()) {
-      VTableName = ClassTypeInfo;
-    } else if (CanUseSingleInheritance(RD)) {
-      VTableName = SIClassTypeInfo;
-    } else {
-      VTableName = VMIClassTypeInfo;
-    }
-    
-    break;
-  }
-
-  case Type::ObjCObject:
-    // Ignore protocol qualifiers.
-    Ty = cast<ObjCObjectType>(Ty)->getBaseType().getTypePtr();
-
-    // Handle id and Class.
-    if (isa<BuiltinType>(Ty)) {
-      VTableName = ClassTypeInfo;
-      break;
-    }
-
-    assert(isa<ObjCInterfaceType>(Ty));
-    // Fall through.
-
-  case Type::ObjCInterface:
-    if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
-      VTableName = SIClassTypeInfo;
-    } else {
-      VTableName = ClassTypeInfo;
-    }
-    break;
-
-  case Type::ObjCObjectPointer:
-  case Type::Pointer:
-    // abi::__pointer_type_info.
-    VTableName = "_ZTVN10__cxxabiv119__pointer_type_infoE";
-    break;
-
-  case Type::MemberPointer:
-    // abi::__pointer_to_member_type_info.
-    VTableName = "_ZTVN10__cxxabiv129__pointer_to_member_type_infoE";
-    break;
-  }
-
-  llvm::Constant *VTable = 
-    CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy);
-    
-  llvm::Type *PtrDiffTy = 
-    CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
-
-  // The vtable address point is 2.
-  llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
-  VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, Two);
-  VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);
-
-  Fields.push_back(VTable);
-}
-
-llvm::Constant *RTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
-  // We want to operate on the canonical type.
-  Ty = CGM.getContext().getCanonicalType(Ty);
-
-  // Check if we've already emitted an RTTI descriptor for this type.
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
-  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
-
-  llvm::GlobalVariable *OldGV = CGM.getModule().getNamedGlobal(Name);
-  if (OldGV && !OldGV->isDeclaration()) {
-    assert(!OldGV->hasAvailableExternallyLinkage() &&
-           "available_externally typeinfos not yet implemented");
-
-    return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy);
-  }
-
-  // Check if there is already an external RTTI descriptor for this type.
-  bool IsStdLib = IsStandardLibraryRTTIDescriptor(Ty);
-  if (!Force && (IsStdLib || ShouldUseExternalRTTIDescriptor(CGM, Ty)))
-    return GetAddrOfExternalRTTIDescriptor(Ty);
-
-  // Emit the standard library with external linkage.
-  llvm::GlobalVariable::LinkageTypes Linkage;
-  if (IsStdLib)
-    Linkage = llvm::GlobalValue::ExternalLinkage;
-  else
-    Linkage = getTypeInfoLinkage(CGM, Ty);
-
-  // Add the vtable pointer.
-  BuildVTablePointer(cast<Type>(Ty));
-  
-  // And the name.
-  llvm::GlobalVariable *TypeName = GetAddrOfTypeName(Ty, Linkage);
-
-  Fields.push_back(llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy));
-
-  switch (Ty->getTypeClass()) {
-#define TYPE(Class, Base)
-#define ABSTRACT_TYPE(Class, Base)
-#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
-#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
-#define DEPENDENT_TYPE(Class, Base) case Type::Class:
-#include "clang/AST/TypeNodes.def"
-    llvm_unreachable("Non-canonical and dependent types shouldn't get here");
-
-  // GCC treats vector types as fundamental types.
-  case Type::Builtin:
-  case Type::Vector:
-  case Type::ExtVector:
-  case Type::Complex:
-  case Type::BlockPointer:
-    // Itanium C++ ABI 2.9.5p4:
-    // abi::__fundamental_type_info adds no data members to std::type_info.
-    break;
-
-  case Type::LValueReference:
-  case Type::RValueReference:
-    llvm_unreachable("References shouldn't get here");
-
-  case Type::Auto:
-    llvm_unreachable("Undeduced auto type shouldn't get here");
-
-  case Type::ConstantArray:
-  case Type::IncompleteArray:
-  case Type::VariableArray:
-    // Itanium C++ ABI 2.9.5p5:
-    // abi::__array_type_info adds no data members to std::type_info.
-    break;
-
-  case Type::FunctionNoProto:
-  case Type::FunctionProto:
-    // Itanium C++ ABI 2.9.5p5:
-    // abi::__function_type_info adds no data members to std::type_info.
-    break;
-
-  case Type::Enum:
-    // Itanium C++ ABI 2.9.5p5:
-    // abi::__enum_type_info adds no data members to std::type_info.
-    break;
-
-  case Type::Record: {
-    const CXXRecordDecl *RD = 
-      cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
-    if (!RD->hasDefinition() || !RD->getNumBases()) {
-      // We don't need to emit any fields.
-      break;
-    }
-    
-    if (CanUseSingleInheritance(RD))
-      BuildSIClassTypeInfo(RD);
-    else 
-      BuildVMIClassTypeInfo(RD);
-
-    break;
-  }
-
-  case Type::ObjCObject:
-  case Type::ObjCInterface:
-    BuildObjCObjectTypeInfo(cast<ObjCObjectType>(Ty));
-    break;
-
-  case Type::ObjCObjectPointer:
-    BuildPointerTypeInfo(cast<ObjCObjectPointerType>(Ty)->getPointeeType());
-    break; 
-      
-  case Type::Pointer:
-    BuildPointerTypeInfo(cast<PointerType>(Ty)->getPointeeType());
-    break;
-
-  case Type::MemberPointer:
-    BuildPointerToMemberTypeInfo(cast<MemberPointerType>(Ty));
-    break;
-
-  case Type::Atomic:
-    // No fields, at least for the moment.
-    break;
-  }
-
-  llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields);
-
-  llvm::GlobalVariable *GV = 
-    new llvm::GlobalVariable(CGM.getModule(), Init->getType(), 
-                             /*Constant=*/true, Linkage, Init, Name);
-  
-  // If there's already an old global variable, replace it with the new one.
-  if (OldGV) {
-    GV->takeName(OldGV);
-    llvm::Constant *NewPtr = 
-      llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
-    OldGV->replaceAllUsesWith(NewPtr);
-    OldGV->eraseFromParent();
-  }
-
-  // GCC only relies on the uniqueness of the type names, not the
-  // type_infos themselves, so we can emit these as hidden symbols.
-  // But don't do this if we're worried about strict visibility
-  // compatibility.
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
-    const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
-
-    CGM.setTypeVisibility(GV, RD, CodeGenModule::TVK_ForRTTI);
-    CGM.setTypeVisibility(TypeName, RD, CodeGenModule::TVK_ForRTTIName);
-  } else {
-    Visibility TypeInfoVisibility = DefaultVisibility;
-    if (CGM.getCodeGenOpts().HiddenWeakVTables &&
-        Linkage == llvm::GlobalValue::LinkOnceODRLinkage)
-      TypeInfoVisibility = HiddenVisibility;
-
-    // The type name should have the same visibility as the type itself.
-    Visibility ExplicitVisibility = Ty->getVisibility();
-    TypeName->setVisibility(CodeGenModule::
-                            GetLLVMVisibility(ExplicitVisibility));
-  
-    TypeInfoVisibility = minVisibility(TypeInfoVisibility, Ty->getVisibility());
-    GV->setVisibility(CodeGenModule::GetLLVMVisibility(TypeInfoVisibility));
-  }
-
-  GV->setUnnamedAddr(true);
-
-  return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
-}
-
-/// ComputeQualifierFlags - Compute the pointer type info flags from the
-/// given qualifier.
-static unsigned ComputeQualifierFlags(Qualifiers Quals) {
-  unsigned Flags = 0;
-
-  if (Quals.hasConst())
-    Flags |= RTTIBuilder::PTI_Const;
-  if (Quals.hasVolatile())
-    Flags |= RTTIBuilder::PTI_Volatile;
-  if (Quals.hasRestrict())
-    Flags |= RTTIBuilder::PTI_Restrict;
-
-  return Flags;
-}
-
-/// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info
-/// for the given Objective-C object type.
-void RTTIBuilder::BuildObjCObjectTypeInfo(const ObjCObjectType *OT) {
-  // Drop qualifiers.
-  const Type *T = OT->getBaseType().getTypePtr();
-  assert(isa<BuiltinType>(T) || isa<ObjCInterfaceType>(T));
-
-  // The builtin types are abi::__class_type_infos and don't require
-  // extra fields.
-  if (isa<BuiltinType>(T)) return;
-
-  ObjCInterfaceDecl *Class = cast<ObjCInterfaceType>(T)->getDecl();
-  ObjCInterfaceDecl *Super = Class->getSuperClass();
-
-  // Root classes are also __class_type_info.
-  if (!Super) return;
-
-  QualType SuperTy = CGM.getContext().getObjCInterfaceType(Super);
-
-  // Everything else is single inheritance.
-  llvm::Constant *BaseTypeInfo = RTTIBuilder(CGM).BuildTypeInfo(SuperTy);
-  Fields.push_back(BaseTypeInfo);
-}
-
-/// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
-/// inheritance, according to the Itanium C++ ABI, 2.95p6b.
-void RTTIBuilder::BuildSIClassTypeInfo(const CXXRecordDecl *RD) {
-  // Itanium C++ ABI 2.9.5p6b:
-  // It adds to abi::__class_type_info a single member pointing to the 
-  // type_info structure for the base type,
-  llvm::Constant *BaseTypeInfo = 
-    RTTIBuilder(CGM).BuildTypeInfo(RD->bases_begin()->getType());
-  Fields.push_back(BaseTypeInfo);
-}
-
-namespace {
-  /// SeenBases - Contains virtual and non-virtual bases seen when traversing
-  /// a class hierarchy.
-  struct SeenBases {
-    llvm::SmallPtrSet<const CXXRecordDecl *, 16> NonVirtualBases;
-    llvm::SmallPtrSet<const CXXRecordDecl *, 16> VirtualBases;
-  };
-}
-
-/// ComputeVMIClassTypeInfoFlags - Compute the value of the flags member in
-/// abi::__vmi_class_type_info.
-///
-static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base, 
-                                             SeenBases &Bases) {
-  
-  unsigned Flags = 0;
-  
-  const CXXRecordDecl *BaseDecl = 
-    cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
-  
-  if (Base->isVirtual()) {
-    // Mark the virtual base as seen.
-    if (!Bases.VirtualBases.insert(BaseDecl)) {
-      // If this virtual base has been seen before, then the class is diamond
-      // shaped.
-      Flags |= RTTIBuilder::VMI_DiamondShaped;
-    } else {
-      if (Bases.NonVirtualBases.count(BaseDecl))
-        Flags |= RTTIBuilder::VMI_NonDiamondRepeat;
-    }
-  } else {
-    // Mark the non-virtual base as seen.
-    if (!Bases.NonVirtualBases.insert(BaseDecl)) {
-      // If this non-virtual base has been seen before, then the class has non-
-      // diamond shaped repeated inheritance.
-      Flags |= RTTIBuilder::VMI_NonDiamondRepeat;
-    } else {
-      if (Bases.VirtualBases.count(BaseDecl))
-        Flags |= RTTIBuilder::VMI_NonDiamondRepeat;
-    }
-  }
-
-  // Walk all bases.
-  for (CXXRecordDecl::base_class_const_iterator I = BaseDecl->bases_begin(),
-       E = BaseDecl->bases_end(); I != E; ++I) 
-    Flags |= ComputeVMIClassTypeInfoFlags(I, Bases);
-  
-  return Flags;
-}
-
-static unsigned ComputeVMIClassTypeInfoFlags(const CXXRecordDecl *RD) {
-  unsigned Flags = 0;
-  SeenBases Bases;
-  
-  // Walk all bases.
-  for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
-       E = RD->bases_end(); I != E; ++I) 
-    Flags |= ComputeVMIClassTypeInfoFlags(I, Bases);
-  
-  return Flags;
-}
-
-/// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
-/// classes with bases that do not satisfy the abi::__si_class_type_info 
-/// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
-void RTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
-  llvm::Type *UnsignedIntLTy = 
-    CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
-  
-  // Itanium C++ ABI 2.9.5p6c:
-  //   __flags is a word with flags describing details about the class 
-  //   structure, which may be referenced by using the __flags_masks 
-  //   enumeration. These flags refer to both direct and indirect bases. 
-  unsigned Flags = ComputeVMIClassTypeInfoFlags(RD);
-  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
-
-  // Itanium C++ ABI 2.9.5p6c:
-  //   __base_count is a word with the number of direct proper base class 
-  //   descriptions that follow.
-  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, RD->getNumBases()));
-  
-  if (!RD->getNumBases())
-    return;
-  
-  llvm::Type *LongLTy = 
-    CGM.getTypes().ConvertType(CGM.getContext().LongTy);
-
-  // Now add the base class descriptions.
-  
-  // Itanium C++ ABI 2.9.5p6c:
-  //   __base_info[] is an array of base class descriptions -- one for every 
-  //   direct proper base. Each description is of the type:
-  //
-  //   struct abi::__base_class_type_info {
-  //   public:
-  //     const __class_type_info *__base_type;
-  //     long __offset_flags;
-  //
-  //     enum __offset_flags_masks {
-  //       __virtual_mask = 0x1,
-  //       __public_mask = 0x2,
-  //       __offset_shift = 8
-  //     };
-  //   };
-  for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
-       E = RD->bases_end(); I != E; ++I) {
-    const CXXBaseSpecifier *Base = I;
-
-    // The __base_type member points to the RTTI for the base type.
-    Fields.push_back(RTTIBuilder(CGM).BuildTypeInfo(Base->getType()));
-
-    const CXXRecordDecl *BaseDecl = 
-      cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
-
-    int64_t OffsetFlags = 0;
-    
-    // All but the lower 8 bits of __offset_flags are a signed offset. 
-    // For a non-virtual base, this is the offset in the object of the base
-    // subobject. For a virtual base, this is the offset in the virtual table of
-    // the virtual base offset for the virtual base referenced (negative).
-    CharUnits Offset;
-    if (Base->isVirtual())
-      Offset = 
-        CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(RD, BaseDecl);
-    else {
-      const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
-      Offset = Layout.getBaseClassOffset(BaseDecl);
-    };
-    
-    OffsetFlags = uint64_t(Offset.getQuantity()) << 8;
-    
-    // The low-order byte of __offset_flags contains flags, as given by the 
-    // masks from the enumeration __offset_flags_masks.
-    if (Base->isVirtual())
-      OffsetFlags |= BCTI_Virtual;
-    if (Base->getAccessSpecifier() == AS_public)
-      OffsetFlags |= BCTI_Public;
-
-    Fields.push_back(llvm::ConstantInt::get(LongLTy, OffsetFlags));
-  }
-}
-
-/// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct,
-/// used for pointer types.
-void RTTIBuilder::BuildPointerTypeInfo(QualType PointeeTy) {  
-  Qualifiers Quals;
-  QualType UnqualifiedPointeeTy = 
-    CGM.getContext().getUnqualifiedArrayType(PointeeTy, Quals);
-  
-  // Itanium C++ ABI 2.9.5p7:
-  //   __flags is a flag word describing the cv-qualification and other 
-  //   attributes of the type pointed to
-  unsigned Flags = ComputeQualifierFlags(Quals);
-
-  // Itanium C++ ABI 2.9.5p7:
-  //   When the abi::__pbase_type_info is for a direct or indirect pointer to an
-  //   incomplete class type, the incomplete target type flag is set. 
-  if (ContainsIncompleteClassType(UnqualifiedPointeeTy))
-    Flags |= PTI_Incomplete;
-
-  llvm::Type *UnsignedIntLTy = 
-    CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
-  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
-  
-  // Itanium C++ ABI 2.9.5p7:
-  //  __pointee is a pointer to the std::type_info derivation for the 
-  //  unqualified type being pointed to.
-  llvm::Constant *PointeeTypeInfo = 
-    RTTIBuilder(CGM).BuildTypeInfo(UnqualifiedPointeeTy);
-  Fields.push_back(PointeeTypeInfo);
-}
-
-/// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info 
-/// struct, used for member pointer types.
-void RTTIBuilder::BuildPointerToMemberTypeInfo(const MemberPointerType *Ty) {
-  QualType PointeeTy = Ty->getPointeeType();
-  
-  Qualifiers Quals;
-  QualType UnqualifiedPointeeTy = 
-    CGM.getContext().getUnqualifiedArrayType(PointeeTy, Quals);
-  
-  // Itanium C++ ABI 2.9.5p7:
-  //   __flags is a flag word describing the cv-qualification and other 
-  //   attributes of the type pointed to.
-  unsigned Flags = ComputeQualifierFlags(Quals);
-
-  const RecordType *ClassType = cast<RecordType>(Ty->getClass());
-
-  // Itanium C++ ABI 2.9.5p7:
-  //   When the abi::__pbase_type_info is for a direct or indirect pointer to an
-  //   incomplete class type, the incomplete target type flag is set. 
-  if (ContainsIncompleteClassType(UnqualifiedPointeeTy))
-    Flags |= PTI_Incomplete;
-
-  if (IsIncompleteClassType(ClassType))
-    Flags |= PTI_ContainingClassIncomplete;
-  
-  llvm::Type *UnsignedIntLTy = 
-    CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
-  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
-  
-  // Itanium C++ ABI 2.9.5p7:
-  //   __pointee is a pointer to the std::type_info derivation for the 
-  //   unqualified type being pointed to.
-  llvm::Constant *PointeeTypeInfo = 
-    RTTIBuilder(CGM).BuildTypeInfo(UnqualifiedPointeeTy);
-  Fields.push_back(PointeeTypeInfo);
-
-  // Itanium C++ ABI 2.9.5p9:
-  //   __context is a pointer to an abi::__class_type_info corresponding to the
-  //   class type containing the member pointed to 
-  //   (e.g., the "A" in "int A::*").
-  Fields.push_back(RTTIBuilder(CGM).BuildTypeInfo(QualType(ClassType, 0)));
-}
-
-llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
-                                                       bool ForEH) {
-  // Return a bogus pointer if RTTI is disabled, unless it's for EH.
-  // FIXME: should we even be calling this method if RTTI is disabled
-  // and it's not for EH?
-  if (!ForEH && !getLangOpts().RTTI)
-    return llvm::Constant::getNullValue(Int8PtrTy);
-  
-  if (ForEH && Ty->isObjCObjectPointerType() &&
-      LangOpts.ObjCRuntime.isGNUFamily())
-    return ObjCRuntime->GetEHType(Ty);
-
-  return RTTIBuilder(*this).BuildTypeInfo(Ty);
-}
-
-void CodeGenModule::EmitFundamentalRTTIDescriptor(QualType Type) {
-  QualType PointerType = Context.getPointerType(Type);
-  QualType PointerTypeConst = Context.getPointerType(Type.withConst());
-  RTTIBuilder(*this).BuildTypeInfo(Type, true);
-  RTTIBuilder(*this).BuildTypeInfo(PointerType, true);
-  RTTIBuilder(*this).BuildTypeInfo(PointerTypeConst, true);
-}
-
-void CodeGenModule::EmitFundamentalRTTIDescriptors() {
-  QualType FundamentalTypes[] = { Context.VoidTy, Context.NullPtrTy,
-                                  Context.BoolTy, Context.WCharTy,
-                                  Context.CharTy, Context.UnsignedCharTy,
-                                  Context.SignedCharTy, Context.ShortTy, 
-                                  Context.UnsignedShortTy, Context.IntTy,
-                                  Context.UnsignedIntTy, Context.LongTy, 
-                                  Context.UnsignedLongTy, Context.LongLongTy, 
-                                  Context.UnsignedLongLongTy, Context.FloatTy,
-                                  Context.DoubleTy, Context.LongDoubleTy,
-                                  Context.Char16Ty, Context.Char32Ty };
-  for (unsigned i = 0; i < llvm::array_lengthof(FundamentalTypes); ++i)
-    EmitFundamentalRTTIDescriptor(FundamentalTypes[i]);
-}
diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h
index b29fc987a120..b45fee56ea09 100644
--- a/lib/CodeGen/CGRecordLayout.h
+++ b/lib/CodeGen/CGRecordLayout.h
@@ -130,7 +130,7 @@ private:
   llvm::DenseMap<const FieldDecl *, CGBitFieldInfo> BitFields;
 
   // FIXME: Maybe we could use a CXXBaseSpecifier as the key and use a single
-  // map for both virtual and non virtual bases.
+  // map for both virtual and non-virtual bases.
   llvm::DenseMap<const CXXRecordDecl *, unsigned> NonVirtualBases;
 
   /// Map from virtual bases to their field index in the complete object.
@@ -183,6 +183,7 @@ public:
   /// \brief Return llvm::StructType element number that corresponds to the
   /// field FD.
   unsigned getLLVMFieldNo(const FieldDecl *FD) const {
+    FD = FD->getCanonicalDecl();
     assert(FieldInfo.count(FD) && "Invalid field for record!");
     return FieldInfo.lookup(FD);
   }
@@ -201,7 +202,8 @@ public:
 
   /// \brief Return the BitFieldInfo that corresponds to the field FD.
   const CGBitFieldInfo &getBitFieldInfo(const FieldDecl *FD) const {
-    assert(FD->isBitField() && "Invalid call for non bit-field decl!");
+    FD = FD->getCanonicalDecl();
+    assert(FD->isBitField() && "Invalid call for non-bit-field decl!");
     llvm::DenseMap<const FieldDecl *, CGBitFieldInfo>::const_iterator
       it = BitFields.find(FD);
     assert(it != BitFields.end() && "Unable to find bitfield info");
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index ab92563b21f3..a10d8e791b0f 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -25,946 +25,647 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace clang;
 using namespace CodeGen;
 
 namespace {
-
-class CGRecordLayoutBuilder {
-public:
-  /// FieldTypes - Holds the LLVM types that the struct is created from.
-  /// 
+/// The CGRecordLowering is responsible for lowering an ASTRecordLayout to an
+/// llvm::Type.  Some of the lowering is straightforward, some is not.  Here we
+/// detail some of the complexities and weirdnesses here.
+/// * LLVM does not have unions - Unions can, in theory be represented by any
+///   llvm::Type with correct size.  We choose a field via a specific heuristic
+///   and add padding if necessary.
+/// * LLVM does not have bitfields - Bitfields are collected into contiguous
+///   runs and allocated as a single storage type for the run.  ASTRecordLayout
+///   contains enough information to determine where the runs break.  Microsoft
+///   and Itanium follow different rules and use different codepaths.
+/// * It is desired that, when possible, bitfields use the appropriate iN type
+///   when lowered to llvm types.  For example unsigned x : 24 gets lowered to
+///   i24.  This isn't always possible because i24 has storage size of 32 bit
+///   and if it is possible to use that extra byte of padding we must use
+///   [i8 x 3] instead of i24.  The function clipTailPadding does this.
+///   C++ examples that require clipping:
+///   struct { int a : 24; char b; }; // a must be clipped, b goes at offset 3
+///   struct A { int a : 24; }; // a must be clipped because a struct like B
+//    could exist: struct B : A { char b; }; // b goes at offset 3
+/// * Clang ignores 0 sized bitfields and 0 sized bases but *not* zero sized
+///   fields.  The existing asserts suggest that LLVM assumes that *every* field
+///   has an underlying storage type.  Therefore empty structures containing
+///   zero sized subobjects such as empty records or zero sized arrays still get
+///   a zero sized (empty struct) storage type.
+/// * Clang reads the complete type rather than the base type when generating
+///   code to access fields.  Bitfields in tail position with tail padding may
+///   be clipped in the base class but not the complete class (we may discover
+///   that the tail padding is not used in the complete class.) However,
+///   because LLVM reads from the complete type it can generate incorrect code
+///   if we do not clip the tail padding off of the bitfield in the complete
+///   layout.  This introduces a somewhat awkward extra unnecessary clip stage.
+///   The location of the clip is stored internally as a sentinal of type
+///   SCISSOR.  If LLVM were updated to read base types (which it probably
+///   should because locations of things such as VBases are bogus in the llvm
+///   type anyway) then we could eliminate the SCISSOR.
+/// * Itanium allows nearly empty primary virtual bases.  These bases don't get
+///   get their own storage because they're laid out as part of another base
+///   or at the beginning of the structure.  Determining if a VBase actually
+///   gets storage awkwardly involves a walk of all bases.
+/// * VFPtrs and VBPtrs do *not* make a record NotZeroInitializable.
+struct CGRecordLowering {
+  // MemberInfo is a helper structure that contains information about a record
+  // member.  In additional to the standard member types, there exists a
+  // sentinal member type that ensures correct rounding.
+  struct MemberInfo {
+    CharUnits Offset;
+    enum InfoKind { VFPtr, VBPtr, Field, Base, VBase, Scissor } Kind;
+    llvm::Type *Data;
+    union {
+      const FieldDecl *FD;
+      const CXXRecordDecl *RD;
+    };
+    MemberInfo(CharUnits Offset, InfoKind Kind, llvm::Type *Data,
+               const FieldDecl *FD = nullptr)
+      : Offset(Offset), Kind(Kind), Data(Data), FD(FD) {}
+    MemberInfo(CharUnits Offset, InfoKind Kind, llvm::Type *Data,
+               const CXXRecordDecl *RD)
+      : Offset(Offset), Kind(Kind), Data(Data), RD(RD) {}
+    // MemberInfos are sorted so we define a < operator.
+    bool operator <(const MemberInfo& a) const { return Offset < a.Offset; }
+  };
+  // The constructor.
+  CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D);
+  // Short helper routines.
+  /// \brief Constructs a MemberInfo instance from an offset and llvm::Type *.
+  MemberInfo StorageInfo(CharUnits Offset, llvm::Type *Data) {
+    return MemberInfo(Offset, MemberInfo::Field, Data);
+  }
+  bool useMSABI() {
+    return Context.getTargetInfo().getCXXABI().isMicrosoft() ||
+           D->isMsStruct(Context);
+  }
+  /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments.
+  llvm::Type *getIntNType(uint64_t NumBits) {
+    return llvm::Type::getIntNTy(Types.getLLVMContext(),
+        (unsigned)llvm::RoundUpToAlignment(NumBits, 8));
+  }
+  /// \brief Gets an llvm type of size NumBytes and alignment 1.
+  llvm::Type *getByteArrayType(CharUnits NumBytes) {
+    assert(!NumBytes.isZero() && "Empty byte arrays aren't allowed.");
+    llvm::Type *Type = llvm::Type::getInt8Ty(Types.getLLVMContext());
+    return NumBytes == CharUnits::One() ? Type :
+        (llvm::Type *)llvm::ArrayType::get(Type, NumBytes.getQuantity());
+  }
+  /// \brief Gets the storage type for a field decl and handles storage
+  /// for itanium bitfields that are smaller than their declared type.
+  llvm::Type *getStorageType(const FieldDecl *FD) {
+    llvm::Type *Type = Types.ConvertTypeForMem(FD->getType());
+    return useMSABI() || !FD->isBitField() ? Type :
+        getIntNType(std::min(FD->getBitWidthValue(Context),
+                             (unsigned)Context.toBits(getSize(Type))));
+  }
+  /// \brief Gets the llvm Basesubobject type from a CXXRecordDecl.
+  llvm::Type *getStorageType(const CXXRecordDecl *RD) {
+    return Types.getCGRecordLayout(RD).getBaseSubobjectLLVMType();
+  }
+  CharUnits bitsToCharUnits(uint64_t BitOffset) {
+    return Context.toCharUnitsFromBits(BitOffset);
+  }
+  CharUnits getSize(llvm::Type *Type) {
+    return CharUnits::fromQuantity(DataLayout.getTypeAllocSize(Type));
+  }
+  CharUnits getAlignment(llvm::Type *Type) {
+    return CharUnits::fromQuantity(DataLayout.getABITypeAlignment(Type));
+  }
+  bool isZeroInitializable(const FieldDecl *FD) {
+    const Type *Type = FD->getType()->getBaseElementTypeUnsafe();
+    if (const MemberPointerType *MPT = Type->getAs<MemberPointerType>())
+      return Types.getCXXABI().isZeroInitializable(MPT);
+    if (const RecordType *RT = Type->getAs<RecordType>())
+      return isZeroInitializable(RT->getDecl());
+    return true;
+  }
+  bool isZeroInitializable(const RecordDecl *RD) {
+    return Types.getCGRecordLayout(RD).isZeroInitializable();
+  }
+  void appendPaddingBytes(CharUnits Size) {
+    if (!Size.isZero())
+      FieldTypes.push_back(getByteArrayType(Size));
+  }
+  uint64_t getFieldBitOffset(const FieldDecl *FD) {
+    return Layout.getFieldOffset(FD->getFieldIndex());
+  }
+  // Layout routines.
+  void setBitFieldInfo(const FieldDecl *FD, CharUnits StartOffset, 
+                       llvm::Type *StorageType);
+  /// \brief Lowers an ASTRecordLayout to a llvm type.
+  void lower(bool NonVirtualBaseType);
+  void lowerUnion();
+  void accumulateFields();
+  void accumulateBitFields(RecordDecl::field_iterator Field,
+                        RecordDecl::field_iterator FieldEnd);
+  void accumulateBases();
+  void accumulateVPtrs();
+  void accumulateVBases();
+  /// \brief Recursively searches all of the bases to find out if a vbase is
+  /// not the primary vbase of some base class.
+  bool hasOwnStorage(const CXXRecordDecl *Decl, const CXXRecordDecl *Query);
+  void calculateZeroInit();
+  /// \brief Lowers bitfield storage types to I8 arrays for bitfields with tail
+  /// padding that is or can potentially be used.
+  void clipTailPadding();
+  /// \brief Determines if we need a packed llvm struct.
+  void determinePacked();
+  /// \brief Inserts padding everwhere it's needed.
+  void insertPadding();
+  /// \brief Fills out the structures that are ultimately consumed.
+  void fillOutputFields();
+  // Input memoization fields.
+  CodeGenTypes &Types;
+  const ASTContext &Context;
+  const RecordDecl *D;
+  const CXXRecordDecl *RD;
+  const ASTRecordLayout &Layout;
+  const llvm::DataLayout &DataLayout;
+  // Helpful intermediate data-structures.
+  std::vector<MemberInfo> Members;
+  // Output fields, consumed by CodeGenTypes::ComputeRecordLayout.
   SmallVector<llvm::Type *, 16> FieldTypes;
-
-  /// BaseSubobjectType - Holds the LLVM type for the non-virtual part
-  /// of the struct. For example, consider:
-  ///
-  /// struct A { int i; };
-  /// struct B { void *v; };
-  /// struct C : virtual A, B { };
-  ///
-  /// The LLVM type of C will be
-  /// %struct.C = type { i32 (...)**, %struct.A, i32, %struct.B }
-  ///
-  /// And the LLVM type of the non-virtual base struct will be
-  /// %struct.C.base = type { i32 (...)**, %struct.A, i32 }
-  ///
-  /// This only gets initialized if the base subobject type is
-  /// different from the complete-object type.
-  llvm::StructType *BaseSubobjectType;
-
-  /// FieldInfo - Holds a field and its corresponding LLVM field number.
   llvm::DenseMap<const FieldDecl *, unsigned> Fields;
-
-  /// BitFieldInfo - Holds location and size information about a bit field.
   llvm::DenseMap<const FieldDecl *, CGBitFieldInfo> BitFields;
-
   llvm::DenseMap<const CXXRecordDecl *, unsigned> NonVirtualBases;
   llvm::DenseMap<const CXXRecordDecl *, unsigned> VirtualBases;
-
-  /// IndirectPrimaryBases - Virtual base classes, direct or indirect, that are
-  /// primary base classes for some other direct or indirect base class.
-  CXXIndirectPrimaryBaseSet IndirectPrimaryBases;
-
-  /// LaidOutVirtualBases - A set of all laid out virtual bases, used to avoid
-  /// avoid laying out virtual bases more than once.
-  llvm::SmallPtrSet<const CXXRecordDecl *, 4> LaidOutVirtualBases;
-  
-  /// IsZeroInitializable - Whether this struct can be C++
-  /// zero-initialized with an LLVM zeroinitializer.
-  bool IsZeroInitializable;
-  bool IsZeroInitializableAsBase;
-
-  /// Packed - Whether the resulting LLVM struct will be packed or not.
-  bool Packed;
-
+  bool IsZeroInitializable : 1;
+  bool IsZeroInitializableAsBase : 1;
+  bool Packed : 1;
 private:
-  CodeGenTypes &Types;
-
-  /// LastLaidOutBaseInfo - Contains the offset and non-virtual size of the
-  /// last base laid out. Used so that we can replace the last laid out base
-  /// type with an i8 array if needed.
-  struct LastLaidOutBaseInfo {
-    CharUnits Offset;
-    CharUnits NonVirtualSize;
-
-    bool isValid() const { return !NonVirtualSize.isZero(); }
-    void invalidate() { NonVirtualSize = CharUnits::Zero(); }
-  
-  } LastLaidOutBase;
-
-  /// Alignment - Contains the alignment of the RecordDecl.
-  CharUnits Alignment;
-
-  /// NextFieldOffset - Holds the next field offset.
-  CharUnits NextFieldOffset;
-
-  /// LayoutUnionField - Will layout a field in an union and return the type
-  /// that the field will have.
-  llvm::Type *LayoutUnionField(const FieldDecl *Field,
-                               const ASTRecordLayout &Layout);
-  
-  /// LayoutUnion - Will layout a union RecordDecl.
-  void LayoutUnion(const RecordDecl *D);
-
-  /// Lay out a sequence of contiguous bitfields.
-  bool LayoutBitfields(const ASTRecordLayout &Layout,
-                       unsigned &FirstFieldNo,
-                       RecordDecl::field_iterator &FI,
-                       RecordDecl::field_iterator FE);
-
-  /// LayoutFields - try to layout all fields in the record decl.
-  /// Returns false if the operation failed because the struct is not packed.
-  bool LayoutFields(const RecordDecl *D);
-
-  /// Layout a single base, virtual or non-virtual
-  bool LayoutBase(const CXXRecordDecl *base,
-                  const CGRecordLayout &baseLayout,
-                  CharUnits baseOffset);
-
-  /// LayoutVirtualBase - layout a single virtual base.
-  bool LayoutVirtualBase(const CXXRecordDecl *base,
-                         CharUnits baseOffset);
-
-  /// LayoutVirtualBases - layout the virtual bases of a record decl.
-  bool LayoutVirtualBases(const CXXRecordDecl *RD,
-                          const ASTRecordLayout &Layout);
-
-  /// MSLayoutVirtualBases - layout the virtual bases of a record decl,
-  /// like MSVC.
-  bool MSLayoutVirtualBases(const CXXRecordDecl *RD,
-                            const ASTRecordLayout &Layout);
-  
-  /// LayoutNonVirtualBase - layout a single non-virtual base.
-  bool LayoutNonVirtualBase(const CXXRecordDecl *base,
-                            CharUnits baseOffset);
-  
-  /// LayoutNonVirtualBases - layout the virtual bases of a record decl.
-  bool LayoutNonVirtualBases(const CXXRecordDecl *RD, 
-                             const ASTRecordLayout &Layout);
-
-  /// ComputeNonVirtualBaseType - Compute the non-virtual base field types.
-  bool ComputeNonVirtualBaseType(const CXXRecordDecl *RD);
-  
-  /// LayoutField - layout a single field. Returns false if the operation failed
-  /// because the current struct is not packed.
-  bool LayoutField(const FieldDecl *D, uint64_t FieldOffset);
-
-  /// LayoutBitField - layout a single bit field.
-  void LayoutBitField(const FieldDecl *D, uint64_t FieldOffset);
-
-  /// AppendField - Appends a field with the given offset and type.
-  void AppendField(CharUnits fieldOffset, llvm::Type *FieldTy);
-
-  /// AppendPadding - Appends enough padding bytes so that the total
-  /// struct size is a multiple of the field alignment.
-  void AppendPadding(CharUnits fieldOffset, CharUnits fieldAlignment);
-
-  /// ResizeLastBaseFieldIfNecessary - Fields and bases can be laid out in the
-  /// tail padding of a previous base. If this happens, the type of the previous
-  /// base needs to be changed to an array of i8. Returns true if the last
-  /// laid out base was resized.
-  bool ResizeLastBaseFieldIfNecessary(CharUnits offset);
-
-  /// getByteArrayType - Returns a byte array type with the given number of
-  /// elements.
-  llvm::Type *getByteArrayType(CharUnits NumBytes);
-  
-  /// AppendBytes - Append a given number of bytes to the record.
-  void AppendBytes(CharUnits numBytes);
-
-  /// AppendTailPadding - Append enough tail padding so that the type will have
-  /// the passed size.
-  void AppendTailPadding(CharUnits RecordSize);
-
-  CharUnits getTypeAlignment(llvm::Type *Ty) const;
-
-  /// getAlignmentAsLLVMStruct - Returns the maximum alignment of all the
-  /// LLVM element types.
-  CharUnits getAlignmentAsLLVMStruct() const;
-
-  /// CheckZeroInitializable - Check if the given type contains a pointer
-  /// to data member.
-  void CheckZeroInitializable(QualType T);
-
-public:
-  CGRecordLayoutBuilder(CodeGenTypes &Types)
-    : BaseSubobjectType(0),
-      IsZeroInitializable(true), IsZeroInitializableAsBase(true),
-      Packed(false), Types(Types) { }
-
-  /// Layout - Will layout a RecordDecl.
-  void Layout(const RecordDecl *D);
+  CGRecordLowering(const CGRecordLowering &) LLVM_DELETED_FUNCTION;
+  void operator =(const CGRecordLowering &) LLVM_DELETED_FUNCTION;
 };
-
-}
-
-void CGRecordLayoutBuilder::Layout(const RecordDecl *D) {
-  const ASTRecordLayout &Layout = Types.getContext().getASTRecordLayout(D);
-  Alignment = Layout.getAlignment();
-  Packed = D->hasAttr<PackedAttr>() || Layout.getSize() % Alignment != 0;
-
-  if (D->isUnion()) {
-    LayoutUnion(D);
-    return;
-  }
-
-  if (LayoutFields(D))
-    return;
-
-  // We weren't able to layout the struct. Try again with a packed struct
-  Packed = true;
-  LastLaidOutBase.invalidate();
-  NextFieldOffset = CharUnits::Zero();
-  FieldTypes.clear();
-  Fields.clear();
-  BitFields.clear();
-  NonVirtualBases.clear();
-  VirtualBases.clear();
-
-  LayoutFields(D);
-}
-
-CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
-                                        const FieldDecl *FD,
-                                        uint64_t Offset, uint64_t Size,
-                                        uint64_t StorageSize,
-                                        uint64_t StorageAlignment) {
-  llvm::Type *Ty = Types.ConvertTypeForMem(FD->getType());
-  CharUnits TypeSizeInBytes =
-    CharUnits::fromQuantity(Types.getDataLayout().getTypeAllocSize(Ty));
-  uint64_t TypeSizeInBits = Types.getContext().toBits(TypeSizeInBytes);
-
-  bool IsSigned = FD->getType()->isSignedIntegerOrEnumerationType();
-
-  if (Size > TypeSizeInBits) {
-    // We have a wide bit-field. The extra bits are only used for padding, so
-    // if we have a bitfield of type T, with size N:
-    //
-    // T t : N;
-    //
-    // We can just assume that it's:
-    //
-    // T t : sizeof(T);
-    //
-    Size = TypeSizeInBits;
-  }
-
+} // namespace {
+
+CGRecordLowering::CGRecordLowering(CodeGenTypes &Types, const RecordDecl *D)
+  : Types(Types), Context(Types.getContext()), D(D),
+    RD(dyn_cast<CXXRecordDecl>(D)),
+    Layout(Types.getContext().getASTRecordLayout(D)),
+    DataLayout(Types.getDataLayout()), IsZeroInitializable(true),
+    IsZeroInitializableAsBase(true), Packed(false) {}
+
+void CGRecordLowering::setBitFieldInfo(
+    const FieldDecl *FD, CharUnits StartOffset, llvm::Type *StorageType) {
+  CGBitFieldInfo &Info = BitFields[FD->getCanonicalDecl()];
+  Info.IsSigned = FD->getType()->isSignedIntegerOrEnumerationType();
+  Info.Offset = (unsigned)(getFieldBitOffset(FD) - Context.toBits(StartOffset));
+  Info.Size = FD->getBitWidthValue(Context);
+  Info.StorageSize = (unsigned)DataLayout.getTypeAllocSizeInBits(StorageType);
+  // Here we calculate the actual storage alignment of the bits.  E.g if we've
+  // got an alignment >= 2 and the bitfield starts at offset 6 we've got an
+  // alignment of 2.
+  Info.StorageAlignment =
+      Layout.getAlignment().alignmentAtOffset(StartOffset).getQuantity();
+  if (Info.Size > Info.StorageSize)
+    Info.Size = Info.StorageSize;
   // Reverse the bit offsets for big endian machines. Because we represent
   // a bitfield as a single large integer load, we can imagine the bits
   // counting from the most-significant-bit instead of the
   // least-significant-bit.
-  if (Types.getDataLayout().isBigEndian()) {
-    Offset = StorageSize - (Offset + Size);
-  }
-
-  return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageAlignment);
+  if (DataLayout.isBigEndian())
+    Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);
 }
 
-/// \brief Layout the range of bitfields from BFI to BFE as contiguous storage.
-bool CGRecordLayoutBuilder::LayoutBitfields(const ASTRecordLayout &Layout,
-                                            unsigned &FirstFieldNo,
-                                            RecordDecl::field_iterator &FI,
-                                            RecordDecl::field_iterator FE) {
-  assert(FI != FE);
-  uint64_t FirstFieldOffset = Layout.getFieldOffset(FirstFieldNo);
-  uint64_t NextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
-
-  unsigned CharAlign = Types.getTarget().getCharAlign();
-  assert(FirstFieldOffset % CharAlign == 0 &&
-         "First field offset is misaligned");
-  CharUnits FirstFieldOffsetInBytes
-    = Types.getContext().toCharUnitsFromBits(FirstFieldOffset);
-
-  unsigned StorageAlignment
-    = llvm::MinAlign(Alignment.getQuantity(),
-                     FirstFieldOffsetInBytes.getQuantity());
-
-  if (FirstFieldOffset < NextFieldOffsetInBits) {
-    CharUnits FieldOffsetInCharUnits =
-      Types.getContext().toCharUnitsFromBits(FirstFieldOffset);
-
-    // Try to resize the last base field.
-    if (!ResizeLastBaseFieldIfNecessary(FieldOffsetInCharUnits))
-      llvm_unreachable("We must be able to resize the last base if we need to "
-                       "pack bits into it.");
-
-    NextFieldOffsetInBits = Types.getContext().toBits(NextFieldOffset);
-    assert(FirstFieldOffset >= NextFieldOffsetInBits);
-  }
-
-  // Append padding if necessary.
-  AppendPadding(Types.getContext().toCharUnitsFromBits(FirstFieldOffset),
-                CharUnits::One());
-
-  // Find the last bitfield in a contiguous run of bitfields.
-  RecordDecl::field_iterator BFI = FI;
-  unsigned LastFieldNo = FirstFieldNo;
-  uint64_t NextContiguousFieldOffset = FirstFieldOffset;
-  for (RecordDecl::field_iterator FJ = FI;
-       (FJ != FE && (*FJ)->isBitField() &&
-        NextContiguousFieldOffset == Layout.getFieldOffset(LastFieldNo) &&
-        (*FJ)->getBitWidthValue(Types.getContext()) != 0); FI = FJ++) {
-    NextContiguousFieldOffset += (*FJ)->getBitWidthValue(Types.getContext());
-    ++LastFieldNo;
-
-    // We must use packed structs for packed fields, and also unnamed bit
-    // fields since they don't affect the struct alignment.
-    if (!Packed && ((*FJ)->hasAttr<PackedAttr>() || !(*FJ)->getDeclName()))
-      return false;
-  }
-  RecordDecl::field_iterator BFE = llvm::next(FI);
-  --LastFieldNo;
-  assert(LastFieldNo >= FirstFieldNo && "Empty run of contiguous bitfields");
-  FieldDecl *LastFD = *FI;
-
-  // Find the last bitfield's offset, add its size, and round it up to the
-  // character alignment to compute the storage required.
-  uint64_t LastFieldOffset = Layout.getFieldOffset(LastFieldNo);
-  uint64_t LastFieldSize = LastFD->getBitWidthValue(Types.getContext());
-  uint64_t TotalBits = (LastFieldOffset + LastFieldSize) - FirstFieldOffset;
-  CharUnits StorageBytes = Types.getContext().toCharUnitsFromBits(
-    llvm::RoundUpToAlignment(TotalBits, CharAlign));
-  uint64_t StorageBits = Types.getContext().toBits(StorageBytes);
-
-  // Grow the storage to encompass any known padding in the layout when doing
-  // so will make the storage a power-of-two. There are two cases when we can
-  // do this. The first is when we have a subsequent field and can widen up to
-  // its offset. The second is when the data size of the AST record layout is
-  // past the end of the current storage. The latter is true when there is tail
-  // padding on a struct and no members of a super class can be packed into it.
-  //
-  // Note that we widen the storage as much as possible here to express the
-  // maximum latitude the language provides, and rely on the backend to lower
-  // these in conjunction with shifts and masks to narrower operations where
-  // beneficial.
-  uint64_t EndOffset = Types.getContext().toBits(Layout.getDataSize());
-  if (BFE != FE)
-    // If there are more fields to be laid out, the offset at the end of the
-    // bitfield is the offset of the next field in the record.
-    EndOffset = Layout.getFieldOffset(LastFieldNo + 1);
-  assert(EndOffset >= (FirstFieldOffset + TotalBits) &&
-         "End offset is not past the end of the known storage bits.");
-  uint64_t SpaceBits = EndOffset - FirstFieldOffset;
-  uint64_t LongBits = Types.getTarget().getLongWidth();
-  uint64_t WidenedBits = (StorageBits / LongBits) * LongBits +
-                         llvm::NextPowerOf2(StorageBits % LongBits - 1);
-  assert(WidenedBits >= StorageBits && "Widening shrunk the bits!");
-  if (WidenedBits <= SpaceBits) {
-    StorageBits = WidenedBits;
-    StorageBytes = Types.getContext().toCharUnitsFromBits(StorageBits);
-    assert(StorageBits == (uint64_t)Types.getContext().toBits(StorageBytes));
-  }
-
-  unsigned FieldIndex = FieldTypes.size();
-  AppendBytes(StorageBytes);
-
-  // Now walk the bitfields associating them with this field of storage and
-  // building up the bitfield specific info.
-  unsigned FieldNo = FirstFieldNo;
-  for (; BFI != BFE; ++BFI, ++FieldNo) {
-    FieldDecl *FD = *BFI;
-    uint64_t FieldOffset = Layout.getFieldOffset(FieldNo) - FirstFieldOffset;
-    uint64_t FieldSize = FD->getBitWidthValue(Types.getContext());
-    Fields[FD] = FieldIndex;
-    BitFields[FD] = CGBitFieldInfo::MakeInfo(Types, FD, FieldOffset, FieldSize,
-                                             StorageBits, StorageAlignment);
-  }
-  FirstFieldNo = LastFieldNo;
-  return true;
+void CGRecordLowering::lower(bool NVBaseType) {
+  // The lowering process implemented in this function takes a variety of
+  // carefully ordered phases.
+  // 1) Store all members (fields and bases) in a list and sort them by offset.
+  // 2) Add a 1-byte capstone member at the Size of the structure.
+  // 3) Clip bitfield storages members if their tail padding is or might be
+  //    used by another field or base.  The clipping process uses the capstone 
+  //    by treating it as another object that occurs after the record.
+  // 4) Determine if the llvm-struct requires packing.  It's important that this
+  //    phase occur after clipping, because clipping changes the llvm type.
+  //    This phase reads the offset of the capstone when determining packedness
+  //    and updates the alignment of the capstone to be equal of the alignment
+  //    of the record after doing so.
+  // 5) Insert padding everywhere it is needed.  This phase requires 'Packed' to
+  //    have been computed and needs to know the alignment of the record in
+  //    order to understand if explicit tail padding is needed.
+  // 6) Remove the capstone, we don't need it anymore.
+  // 7) Determine if this record can be zero-initialized.  This phase could have
+  //    been placed anywhere after phase 1.
+  // 8) Format the complete list of members in a way that can be consumed by
+  //    CodeGenTypes::ComputeRecordLayout.
+  CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize();
+  if (D->isUnion())
+    return lowerUnion();
+  accumulateFields();
+  // RD implies C++.
+  if (RD) {
+    accumulateVPtrs();
+    accumulateBases();
+    if (Members.empty())
+      return appendPaddingBytes(Size);
+    if (!NVBaseType)
+      accumulateVBases();
+  }
+  std::stable_sort(Members.begin(), Members.end());
+  Members.push_back(StorageInfo(Size, getIntNType(8)));
+  clipTailPadding();
+  determinePacked();
+  insertPadding();
+  Members.pop_back();
+  calculateZeroInit();
+  fillOutputFields();
 }
 
-bool CGRecordLayoutBuilder::LayoutField(const FieldDecl *D,
-                                        uint64_t fieldOffset) {
-  // If the field is packed, then we need a packed struct.
-  if (!Packed && D->hasAttr<PackedAttr>())
-    return false;
-
-  assert(!D->isBitField() && "Bitfields should be laid out seperately.");
-
-  CheckZeroInitializable(D->getType());
-
-  assert(fieldOffset % Types.getTarget().getCharWidth() == 0
-         && "field offset is not on a byte boundary!");
-  CharUnits fieldOffsetInBytes
-    = Types.getContext().toCharUnitsFromBits(fieldOffset);
-
-  llvm::Type *Ty = Types.ConvertTypeForMem(D->getType());
-  CharUnits typeAlignment = getTypeAlignment(Ty);
-
-  // If the type alignment is larger then the struct alignment, we must use
-  // a packed struct.
-  if (typeAlignment > Alignment) {
-    assert(!Packed && "Alignment is wrong even with packed struct!");
-    return false;
-  }
-
-  if (!Packed) {
-    if (const RecordType *RT = D->getType()->getAs<RecordType>()) {
-      const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
-      if (const MaxFieldAlignmentAttr *MFAA =
-            RD->getAttr<MaxFieldAlignmentAttr>()) {
-        if (MFAA->getAlignment() != Types.getContext().toBits(typeAlignment))
-          return false;
-      }
+void CGRecordLowering::lowerUnion() {
+  CharUnits LayoutSize = Layout.getSize();
+  llvm::Type *StorageType = nullptr;
+  // Compute zero-initializable status.
+  if (!D->field_empty() && !isZeroInitializable(*D->field_begin()))
+    IsZeroInitializable = IsZeroInitializableAsBase = false;
+  // Iterate through the fields setting bitFieldInfo and the Fields array. Also
+  // locate the "most appropriate" storage type.  The heuristic for finding the
+  // storage type isn't necessary, the first (non-0-length-bitfield) field's
+  // type would work fine and be simpler but would be differen than what we've
+  // been doing and cause lit tests to change.
+  for (const auto *Field : D->fields()) {
+    if (Field->isBitField()) {
+      // Skip 0 sized bitfields.
+      if (Field->getBitWidthValue(Context) == 0)
+        continue;
+      llvm::Type *FieldType = getStorageType(Field);
+      if (LayoutSize < getSize(FieldType))
+        FieldType = getByteArrayType(LayoutSize);
+      setBitFieldInfo(Field, CharUnits::Zero(), FieldType);
     }
-  }
-
-  // Round up the field offset to the alignment of the field type.
-  CharUnits alignedNextFieldOffsetInBytes =
-    NextFieldOffset.RoundUpToAlignment(typeAlignment);
+    Fields[Field->getCanonicalDecl()] = 0;
+    llvm::Type *FieldType = getStorageType(Field);
+    // Conditionally update our storage type if we've got a new "better" one.
+    if (!StorageType ||
+        getAlignment(FieldType) >  getAlignment(StorageType) ||
+        (getAlignment(FieldType) == getAlignment(StorageType) &&
+        getSize(FieldType) > getSize(StorageType)))
+      StorageType = FieldType;
+  }
+  // If we have no storage type just pad to the appropriate size and return.
+  if (!StorageType)
+    return appendPaddingBytes(LayoutSize);
+  // If our storage size was bigger than our required size (can happen in the
+  // case of packed bitfields on Itanium) then just use an I8 array.
+  if (LayoutSize < getSize(StorageType))
+    StorageType = getByteArrayType(LayoutSize);
+  FieldTypes.push_back(StorageType);
+  appendPaddingBytes(LayoutSize - getSize(StorageType));
+  // Set packed if we need it.
+  if (LayoutSize % getAlignment(StorageType))
+    Packed = true;
+}
 
-  if (fieldOffsetInBytes < alignedNextFieldOffsetInBytes) {
-    // Try to resize the last base field.
-    if (ResizeLastBaseFieldIfNecessary(fieldOffsetInBytes)) {
-      alignedNextFieldOffsetInBytes = 
-        NextFieldOffset.RoundUpToAlignment(typeAlignment);
+void CGRecordLowering::accumulateFields() {
+  for (RecordDecl::field_iterator Field = D->field_begin(),
+                                  FieldEnd = D->field_end();
+    Field != FieldEnd;)
+    if (Field->isBitField()) {
+      RecordDecl::field_iterator Start = Field;
+      // Iterate to gather the list of bitfields.
+      for (++Field; Field != FieldEnd && Field->isBitField(); ++Field);
+      accumulateBitFields(Start, Field);
+    } else {
+      Members.push_back(MemberInfo(
+          bitsToCharUnits(getFieldBitOffset(*Field)), MemberInfo::Field,
+          getStorageType(*Field), *Field));
+      ++Field;
     }
-  }
-
-  if (fieldOffsetInBytes < alignedNextFieldOffsetInBytes) {
-    assert(!Packed && "Could not place field even with packed struct!");
-    return false;
-  }
-
-  AppendPadding(fieldOffsetInBytes, typeAlignment);
-
-  // Now append the field.
-  Fields[D] = FieldTypes.size();
-  AppendField(fieldOffsetInBytes, Ty);
-
-  LastLaidOutBase.invalidate();
-  return true;
 }
 
-llvm::Type *
-CGRecordLayoutBuilder::LayoutUnionField(const FieldDecl *Field,
-                                        const ASTRecordLayout &Layout) {
-  Fields[Field] = 0;
-  if (Field->isBitField()) {
-    uint64_t FieldSize = Field->getBitWidthValue(Types.getContext());
-
-    // Ignore zero sized bit fields.
-    if (FieldSize == 0)
-      return 0;
-
-    unsigned StorageBits = llvm::RoundUpToAlignment(
-      FieldSize, Types.getTarget().getCharAlign());
-    CharUnits NumBytesToAppend
-      = Types.getContext().toCharUnitsFromBits(StorageBits);
-
-    llvm::Type *FieldTy = llvm::Type::getInt8Ty(Types.getLLVMContext());
-    if (NumBytesToAppend > CharUnits::One())
-      FieldTy = llvm::ArrayType::get(FieldTy, NumBytesToAppend.getQuantity());
-
-    // Add the bit field info.
-    BitFields[Field] = CGBitFieldInfo::MakeInfo(Types, Field, 0, FieldSize,
-                                                StorageBits,
-                                                Alignment.getQuantity());
-    return FieldTy;
+void
+CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
+                                      RecordDecl::field_iterator FieldEnd) {
+  // Run stores the first element of the current run of bitfields.  FieldEnd is
+  // used as a special value to note that we don't have a current run.  A
+  // bitfield run is a contiguous collection of bitfields that can be stored in
+  // the same storage block.  Zero-sized bitfields and bitfields that would
+  // cross an alignment boundary break a run and start a new one.
+  RecordDecl::field_iterator Run = FieldEnd;
+  // Tail is the offset of the first bit off the end of the current run.  It's
+  // used to determine if the ASTRecordLayout is treating these two bitfields as
+  // contiguous.  StartBitOffset is offset of the beginning of the Run.
+  uint64_t StartBitOffset, Tail = 0;
+  if (useMSABI()) {
+    for (; Field != FieldEnd; ++Field) {
+      uint64_t BitOffset = getFieldBitOffset(*Field);
+      // Zero-width bitfields end runs.
+      if (Field->getBitWidthValue(Context) == 0) {
+        Run = FieldEnd;
+        continue;
+      }
+      llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
+      // If we don't have a run yet, or don't live within the previous run's
+      // allocated storage then we allocate some storage and start a new run.
+      if (Run == FieldEnd || BitOffset >= Tail) {
+        Run = Field;
+        StartBitOffset = BitOffset;
+        Tail = StartBitOffset + DataLayout.getTypeAllocSizeInBits(Type);
+        // Add the storage member to the record.  This must be added to the
+        // record before the bitfield members so that it gets laid out before
+        // the bitfields it contains get laid out.
+        Members.push_back(StorageInfo(bitsToCharUnits(StartBitOffset), Type));
+      }
+      // Bitfields get the offset of their storage but come afterward and remain
+      // there after a stable sort.
+      Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
+                                   MemberInfo::Field, nullptr, *Field));
+    }
+    return;
   }
-
-  // This is a regular union field.
-  return Types.ConvertTypeForMem(Field->getType());
-}
-
-void CGRecordLayoutBuilder::LayoutUnion(const RecordDecl *D) {
-  assert(D->isUnion() && "Can't call LayoutUnion on a non-union record!");
-
-  const ASTRecordLayout &layout = Types.getContext().getASTRecordLayout(D);
-
-  llvm::Type *unionType = 0;
-  CharUnits unionSize = CharUnits::Zero();
-  CharUnits unionAlign = CharUnits::Zero();
-
-  bool hasOnlyZeroSizedBitFields = true;
-  bool checkedFirstFieldZeroInit = false;
-
-  unsigned fieldNo = 0;
-  for (RecordDecl::field_iterator field = D->field_begin(),
-       fieldEnd = D->field_end(); field != fieldEnd; ++field, ++fieldNo) {
-    assert(layout.getFieldOffset(fieldNo) == 0 &&
-          "Union field offset did not start at the beginning of record!");
-    llvm::Type *fieldType = LayoutUnionField(*field, layout);
-
-    if (!fieldType)
+  for (;;) {
+    // Check to see if we need to start a new run.
+    if (Run == FieldEnd) {
+      // If we're out of fields, return.
+      if (Field == FieldEnd)
+        break;
+      // Any non-zero-length bitfield can start a new run.
+      if (Field->getBitWidthValue(Context) != 0) {
+        Run = Field;
+        StartBitOffset = getFieldBitOffset(*Field);
+        Tail = StartBitOffset + Field->getBitWidthValue(Context);
+      }
+      ++Field;
       continue;
-
-    if (field->getDeclName() && !checkedFirstFieldZeroInit) {
-      CheckZeroInitializable(field->getType());
-      checkedFirstFieldZeroInit = true;
     }
-
-    hasOnlyZeroSizedBitFields = false;
-
-    CharUnits fieldAlign = CharUnits::fromQuantity(
-                          Types.getDataLayout().getABITypeAlignment(fieldType));
-    CharUnits fieldSize = CharUnits::fromQuantity(
-                             Types.getDataLayout().getTypeAllocSize(fieldType));
-
-    if (fieldAlign < unionAlign)
+    // Add bitfields to the run as long as they qualify.
+    if (Field != FieldEnd && Field->getBitWidthValue(Context) != 0 &&
+        Tail == getFieldBitOffset(*Field)) {
+      Tail += Field->getBitWidthValue(Context);
+      ++Field;
       continue;
-
-    if (fieldAlign > unionAlign || fieldSize > unionSize) {
-      unionType = fieldType;
-      unionAlign = fieldAlign;
-      unionSize = fieldSize;
-    }
-  }
-
-  // Now add our field.
-  if (unionType) {
-    AppendField(CharUnits::Zero(), unionType);
-
-    if (getTypeAlignment(unionType) > layout.getAlignment()) {
-      // We need a packed struct.
-      Packed = true;
-      unionAlign = CharUnits::One();
     }
+    // We've hit a break-point in the run and need to emit a storage field.
+    llvm::Type *Type = getIntNType(Tail - StartBitOffset);
+    // Add the storage member to the record and set the bitfield info for all of
+    // the bitfields in the run.  Bitfields get the offset of their storage but
+    // come afterward and remain there after a stable sort.
+    Members.push_back(StorageInfo(bitsToCharUnits(StartBitOffset), Type));
+    for (; Run != Field; ++Run)
+      Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
+                                   MemberInfo::Field, nullptr, *Run));
+    Run = FieldEnd;
   }
-  if (unionAlign.isZero()) {
-    (void)hasOnlyZeroSizedBitFields;
-    assert(hasOnlyZeroSizedBitFields &&
-           "0-align record did not have all zero-sized bit-fields!");
-    unionAlign = CharUnits::One();
-  }
-
-  // Append tail padding.
-  CharUnits recordSize = layout.getSize();
-  if (recordSize > unionSize)
-    AppendPadding(recordSize, unionAlign);
-}
-
-bool CGRecordLayoutBuilder::LayoutBase(const CXXRecordDecl *base,
-                                       const CGRecordLayout &baseLayout,
-                                       CharUnits baseOffset) {
-  ResizeLastBaseFieldIfNecessary(baseOffset);
-
-  AppendPadding(baseOffset, CharUnits::One());
-
-  const ASTRecordLayout &baseASTLayout
-    = Types.getContext().getASTRecordLayout(base);
-
-  LastLaidOutBase.Offset = NextFieldOffset;
-  LastLaidOutBase.NonVirtualSize = baseASTLayout.getNonVirtualSize();
-
-  llvm::StructType *subobjectType = baseLayout.getBaseSubobjectLLVMType();
-  if (getTypeAlignment(subobjectType) > Alignment)
-    return false;
-
-  AppendField(baseOffset, subobjectType);
-  return true;
 }
 
-bool CGRecordLayoutBuilder::LayoutNonVirtualBase(const CXXRecordDecl *base,
-                                                 CharUnits baseOffset) {
-  // Ignore empty bases.
-  if (base->isEmpty()) return true;
-
-  const CGRecordLayout &baseLayout = Types.getCGRecordLayout(base);
-  if (IsZeroInitializableAsBase) {
-    assert(IsZeroInitializable &&
-           "class zero-initializable as base but not as complete object");
-
-    IsZeroInitializable = IsZeroInitializableAsBase =
-      baseLayout.isZeroInitializableAsBase();
+void CGRecordLowering::accumulateBases() {
+  // If we've got a primary virtual base, we need to add it with the bases.
+  if (Layout.isPrimaryBaseVirtual()) {
+    const CXXRecordDecl *BaseDecl = Layout.getPrimaryBase();
+    Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::Base,
+                                 getStorageType(BaseDecl), BaseDecl));
   }
-
-  if (!LayoutBase(base, baseLayout, baseOffset))
-    return false;
-  NonVirtualBases[base] = (FieldTypes.size() - 1);
-  return true;
-}
-
-bool
-CGRecordLayoutBuilder::LayoutVirtualBase(const CXXRecordDecl *base,
-                                         CharUnits baseOffset) {
-  // Ignore empty bases.
-  if (base->isEmpty()) return true;
-
-  const CGRecordLayout &baseLayout = Types.getCGRecordLayout(base);
-  if (IsZeroInitializable)
-    IsZeroInitializable = baseLayout.isZeroInitializableAsBase();
-
-  if (!LayoutBase(base, baseLayout, baseOffset))
-    return false;
-  VirtualBases[base] = (FieldTypes.size() - 1);
-  return true;
-}
-
-bool
-CGRecordLayoutBuilder::MSLayoutVirtualBases(const CXXRecordDecl *RD,
-                                          const ASTRecordLayout &Layout) {
-  if (!RD->getNumVBases())
-    return true;
-
-  // The vbases list is uniqued and ordered by a depth-first
-  // traversal, which is what we need here.
-  for (CXXRecordDecl::base_class_const_iterator I = RD->vbases_begin(),
-        E = RD->vbases_end(); I != E; ++I) {
-
-    const CXXRecordDecl *BaseDecl = 
-      cast<CXXRecordDecl>(I->getType()->castAs<RecordType>()->getDecl());
-
-    CharUnits vbaseOffset = Layout.getVBaseClassOffset(BaseDecl);
-    if (!LayoutVirtualBase(BaseDecl, vbaseOffset))
-      return false;
+  // Accumulate the non-virtual bases.
+  for (const auto &Base : RD->bases()) {
+    if (Base.isVirtual())
+      continue;
+    const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+    if (!BaseDecl->isEmpty())
+      Members.push_back(MemberInfo(Layout.getBaseClassOffset(BaseDecl),
+          MemberInfo::Base, getStorageType(BaseDecl), BaseDecl));
   }
-  return true;
 }
 
-/// LayoutVirtualBases - layout the non-virtual bases of a record decl.
-bool
-CGRecordLayoutBuilder::LayoutVirtualBases(const CXXRecordDecl *RD,
-                                          const ASTRecordLayout &Layout) {
-  for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
-       E = RD->bases_end(); I != E; ++I) {
-    const CXXRecordDecl *BaseDecl = 
-      cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
+void CGRecordLowering::accumulateVPtrs() {
+  if (Layout.hasOwnVFPtr())
+    Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr,
+        llvm::FunctionType::get(getIntNType(32), /*isVarArg=*/true)->
+            getPointerTo()->getPointerTo()));
+  if (Layout.hasOwnVBPtr())
+    Members.push_back(MemberInfo(Layout.getVBPtrOffset(), MemberInfo::VBPtr,
+        llvm::Type::getInt32PtrTy(Types.getLLVMContext())));
+}
 
-    // We only want to lay out virtual bases that aren't indirect primary bases
-    // of some other base.
-    if (I->isVirtual() && !IndirectPrimaryBases.count(BaseDecl)) {
-      // Only lay out the base once.
-      if (!LaidOutVirtualBases.insert(BaseDecl))
+void CGRecordLowering::accumulateVBases() {
+  CharUnits ScissorOffset = Layout.getNonVirtualSize();
+  // In the itanium ABI, it's possible to place a vbase at a dsize that is
+  // smaller than the nvsize.  Here we check to see if such a base is placed
+  // before the nvsize and set the scissor offset to that, instead of the
+  // nvsize.
+  if (!useMSABI())
+    for (const auto &Base : RD->vbases()) {
+      const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+      if (BaseDecl->isEmpty())
         continue;
-
-      CharUnits vbaseOffset = Layout.getVBaseClassOffset(BaseDecl);
-      if (!LayoutVirtualBase(BaseDecl, vbaseOffset))
-        return false;
+      // If the vbase is a primary virtual base of some base, then it doesn't
+      // get its own storage location but instead lives inside of that base.
+      if (Context.isNearlyEmpty(BaseDecl) && !hasOwnStorage(RD, BaseDecl))
+        continue;
+      ScissorOffset = std::min(ScissorOffset,
+                               Layout.getVBaseClassOffset(BaseDecl));
     }
-
-    if (!BaseDecl->getNumVBases()) {
-      // This base isn't interesting since it doesn't have any virtual bases.
+  Members.push_back(MemberInfo(ScissorOffset, MemberInfo::Scissor, nullptr,
+                               RD));
+  for (const auto &Base : RD->vbases()) {
+    const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+    if (BaseDecl->isEmpty())
+      continue;
+    CharUnits Offset = Layout.getVBaseClassOffset(BaseDecl);
+    // If the vbase is a primary virtual base of some base, then it doesn't
+    // get its own storage location but instead lives inside of that base.
+    if (!useMSABI() && Context.isNearlyEmpty(BaseDecl) &&
+        !hasOwnStorage(RD, BaseDecl)) {
+      Members.push_back(MemberInfo(Offset, MemberInfo::VBase, nullptr,
+                                   BaseDecl));
       continue;
     }
-    
-    if (!LayoutVirtualBases(BaseDecl, Layout))
-      return false;
+    // If we've got a vtordisp, add it as a storage type.
+    if (Layout.getVBaseOffsetsMap().find(BaseDecl)->second.hasVtorDisp())
+      Members.push_back(StorageInfo(Offset - CharUnits::fromQuantity(4),
+                                    getIntNType(32)));
+    Members.push_back(MemberInfo(Offset, MemberInfo::VBase,
+                                 getStorageType(BaseDecl), BaseDecl));
   }
-  return true;
 }
 
-bool
-CGRecordLayoutBuilder::LayoutNonVirtualBases(const CXXRecordDecl *RD,
-                                             const ASTRecordLayout &Layout) {
-  const CXXRecordDecl *PrimaryBase = Layout.getPrimaryBase();
-
-  // If we have a primary base, lay it out first.
-  if (PrimaryBase) {
-    if (!Layout.isPrimaryBaseVirtual()) {
-      if (!LayoutNonVirtualBase(PrimaryBase, CharUnits::Zero()))
-        return false;
-    } else {
-      if (!LayoutVirtualBase(PrimaryBase, CharUnits::Zero()))
-        return false;
-    }
-
-  // Otherwise, add a vtable / vf-table if the layout says to do so.
-  } else if (Layout.hasOwnVFPtr()) {
-    llvm::Type *FunctionType =
-      llvm::FunctionType::get(llvm::Type::getInt32Ty(Types.getLLVMContext()),
-                              /*isVarArg=*/true);
-    llvm::Type *VTableTy = FunctionType->getPointerTo();
-
-    if (getTypeAlignment(VTableTy) > Alignment) {
-      // FIXME: Should we allow this to happen in Sema?
-      assert(!Packed && "Alignment is wrong even with packed struct!");
-      return false;
-    }
-
-    assert(NextFieldOffset.isZero() &&
-           "VTable pointer must come first!");
-    AppendField(CharUnits::Zero(), VTableTy->getPointerTo());
-  }
-
-  // Layout the non-virtual bases.
-  for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
-       E = RD->bases_end(); I != E; ++I) {
-    if (I->isVirtual())
-      continue;
-
-    const CXXRecordDecl *BaseDecl = 
-      cast<CXXRecordDecl>(I->getType()->getAs<RecordType>()->getDecl());
-
-    // We've already laid out the primary base.
-    if (BaseDecl == PrimaryBase && !Layout.isPrimaryBaseVirtual())
-      continue;
-
-    if (!LayoutNonVirtualBase(BaseDecl, Layout.getBaseClassOffset(BaseDecl)))
+bool CGRecordLowering::hasOwnStorage(const CXXRecordDecl *Decl,
+                                     const CXXRecordDecl *Query) {
+  const ASTRecordLayout &DeclLayout = Context.getASTRecordLayout(Decl);
+  if (DeclLayout.isPrimaryBaseVirtual() && DeclLayout.getPrimaryBase() == Query)
+    return false;
+  for (const auto &Base : Decl->bases())
+    if (!hasOwnStorage(Base.getType()->getAsCXXRecordDecl(), Query))
       return false;
-  }
-
-  // Add a vb-table pointer if the layout insists.
-    if (Layout.hasOwnVBPtr()) {
-    CharUnits VBPtrOffset = Layout.getVBPtrOffset();
-    llvm::Type *Vbptr = llvm::Type::getInt32PtrTy(Types.getLLVMContext());
-    AppendPadding(VBPtrOffset, getTypeAlignment(Vbptr));
-    AppendField(VBPtrOffset, Vbptr);
-  }
-
-  return true;
-}
-
-bool
-CGRecordLayoutBuilder::ComputeNonVirtualBaseType(const CXXRecordDecl *RD) {
-  const ASTRecordLayout &Layout = Types.getContext().getASTRecordLayout(RD);
-
-  CharUnits NonVirtualSize  = Layout.getNonVirtualSize();
-  CharUnits NonVirtualAlign = Layout.getNonVirtualAlign();
-  CharUnits AlignedNonVirtualTypeSize =
-    NonVirtualSize.RoundUpToAlignment(NonVirtualAlign);
-  
-  // First check if we can use the same fields as for the complete class.
-  CharUnits RecordSize = Layout.getSize();
-  if (AlignedNonVirtualTypeSize == RecordSize)
-    return true;
-
-  // Check if we need padding.
-  CharUnits AlignedNextFieldOffset =
-    NextFieldOffset.RoundUpToAlignment(getAlignmentAsLLVMStruct());
-
-  if (AlignedNextFieldOffset > AlignedNonVirtualTypeSize) {
-    assert(!Packed && "cannot layout even as packed struct");
-    return false; // Needs packing.
-  }
-
-  bool needsPadding = (AlignedNonVirtualTypeSize != AlignedNextFieldOffset);
-  if (needsPadding) {
-    CharUnits NumBytes = AlignedNonVirtualTypeSize - AlignedNextFieldOffset;
-    FieldTypes.push_back(getByteArrayType(NumBytes));
-  }
-  
-  BaseSubobjectType = llvm::StructType::create(Types.getLLVMContext(),
-                                               FieldTypes, "", Packed);
-  Types.addRecordTypeName(RD, BaseSubobjectType, ".base");
-
-  // Pull the padding back off.
-  if (needsPadding)
-    FieldTypes.pop_back();
-
   return true;
 }
 
-bool CGRecordLayoutBuilder::LayoutFields(const RecordDecl *D) {
-  assert(!D->isUnion() && "Can't call LayoutFields on a union!");
-  assert(!Alignment.isZero() && "Did not set alignment!");
-
-  const ASTRecordLayout &Layout = Types.getContext().getASTRecordLayout(D);
-
-  const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D);
-  if (RD)
-    if (!LayoutNonVirtualBases(RD, Layout))
-      return false;
-
-  unsigned FieldNo = 0;
-  
-  for (RecordDecl::field_iterator FI = D->field_begin(), FE = D->field_end();
-       FI != FE; ++FI, ++FieldNo) {
-    FieldDecl *FD = *FI;
-
-    // If this field is a bitfield, layout all of the consecutive
-    // non-zero-length bitfields and the last zero-length bitfield; these will
-    // all share storage.
-    if (FD->isBitField()) {
-      // If all we have is a zero-width bitfield, skip it.
-      if (FD->getBitWidthValue(Types.getContext()) == 0)
+void CGRecordLowering::calculateZeroInit() {
+  for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+                                               MemberEnd = Members.end();
+       IsZeroInitializableAsBase && Member != MemberEnd; ++Member) {
+    if (Member->Kind == MemberInfo::Field) {
+      if (!Member->FD || isZeroInitializable(Member->FD))
         continue;
-
-      // Layout this range of bitfields.
-      if (!LayoutBitfields(Layout, FieldNo, FI, FE)) {
-        assert(!Packed &&
-               "Could not layout bitfields even with a packed LLVM struct!");
-        return false;
-      }
-      assert(FI != FE && "Advanced past the last bitfield");
-      continue;
-    }
-
-    if (!LayoutField(FD, Layout.getFieldOffset(FieldNo))) {
-      assert(!Packed &&
-             "Could not layout fields even with a packed LLVM struct!");
-      return false;
-    }
-  }
-
-  if (RD) {
-    // We've laid out the non-virtual bases and the fields, now compute the
-    // non-virtual base field types.
-    if (!ComputeNonVirtualBaseType(RD)) {
-      assert(!Packed && "Could not layout even with a packed LLVM struct!");
-      return false;
-    }
-
-    // Lay out the virtual bases.  The MS ABI uses a different
-    // algorithm here due to the lack of primary virtual bases.
-    if (Types.getTarget().getCXXABI().hasPrimaryVBases()) {
-      RD->getIndirectPrimaryBases(IndirectPrimaryBases);
-      if (Layout.isPrimaryBaseVirtual())
-        IndirectPrimaryBases.insert(Layout.getPrimaryBase());
-
-      if (!LayoutVirtualBases(RD, Layout))
-        return false;
-    } else {
-      if (!MSLayoutVirtualBases(RD, Layout))
-        return false;
+      IsZeroInitializable = IsZeroInitializableAsBase = false;
+    } else if (Member->Kind == MemberInfo::Base ||
+               Member->Kind == MemberInfo::VBase) {
+      if (isZeroInitializable(Member->RD))
+        continue;
+      IsZeroInitializable = false;
+      if (Member->Kind == MemberInfo::Base)
+        IsZeroInitializableAsBase = false;
     }
   }
-  
-  // Append tail padding if necessary.
-  AppendTailPadding(Layout.getSize());
-
-  return true;
-}
-
-void CGRecordLayoutBuilder::AppendTailPadding(CharUnits RecordSize) {
-  ResizeLastBaseFieldIfNecessary(RecordSize);
-
-  assert(NextFieldOffset <= RecordSize && "Size mismatch!");
-
-  CharUnits AlignedNextFieldOffset =
-    NextFieldOffset.RoundUpToAlignment(getAlignmentAsLLVMStruct());
-
-  if (AlignedNextFieldOffset == RecordSize) {
-    // We don't need any padding.
-    return;
-  }
-
-  CharUnits NumPadBytes = RecordSize - NextFieldOffset;
-  AppendBytes(NumPadBytes);
-}
-
-void CGRecordLayoutBuilder::AppendField(CharUnits fieldOffset,
-                                        llvm::Type *fieldType) {
-  CharUnits fieldSize =
-    CharUnits::fromQuantity(Types.getDataLayout().getTypeAllocSize(fieldType));
-
-  FieldTypes.push_back(fieldType);
-
-  NextFieldOffset = fieldOffset + fieldSize;
 }
 
-void CGRecordLayoutBuilder::AppendPadding(CharUnits fieldOffset,
-                                          CharUnits fieldAlignment) {
-  assert(NextFieldOffset <= fieldOffset &&
-         "Incorrect field layout!");
-
-  // Do nothing if we're already at the right offset.
-  if (fieldOffset == NextFieldOffset) return;
-
-  // If we're not emitting a packed LLVM type, try to avoid adding
-  // unnecessary padding fields.
-  if (!Packed) {
-    // Round up the field offset to the alignment of the field type.
-    CharUnits alignedNextFieldOffset =
-      NextFieldOffset.RoundUpToAlignment(fieldAlignment);
-    assert(alignedNextFieldOffset <= fieldOffset);
-
-    // If that's the right offset, we're done.
-    if (alignedNextFieldOffset == fieldOffset) return;
+void CGRecordLowering::clipTailPadding() {
+  std::vector<MemberInfo>::iterator Prior = Members.begin();
+  CharUnits Tail = getSize(Prior->Data);
+  for (std::vector<MemberInfo>::iterator Member = Prior + 1,
+                                         MemberEnd = Members.end();
+       Member != MemberEnd; ++Member) {
+    // Only members with data and the scissor can cut into tail padding.
+    if (!Member->Data && Member->Kind != MemberInfo::Scissor)
+      continue;
+    if (Member->Offset < Tail) {
+      assert(Prior->Kind == MemberInfo::Field && !Prior->FD &&
+             "Only storage fields have tail padding!");
+      Prior->Data = getByteArrayType(bitsToCharUnits(llvm::RoundUpToAlignment(
+          cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8)));
+    }
+    if (Member->Data)
+      Prior = Member;
+    Tail = Prior->Offset + getSize(Prior->Data);
   }
-
-  // Otherwise we need explicit padding.
-  CharUnits padding = fieldOffset - NextFieldOffset;
-  AppendBytes(padding);
-}
-
-bool CGRecordLayoutBuilder::ResizeLastBaseFieldIfNecessary(CharUnits offset) {
-  // Check if we have a base to resize.
-  if (!LastLaidOutBase.isValid())
-    return false;
-
-  // This offset does not overlap with the tail padding.
-  if (offset >= NextFieldOffset)
-    return false;
-
-  // Restore the field offset and append an i8 array instead.
-  FieldTypes.pop_back();
-  NextFieldOffset = LastLaidOutBase.Offset;
-  AppendBytes(LastLaidOutBase.NonVirtualSize);
-  LastLaidOutBase.invalidate();
-
-  return true;
 }
 
-llvm::Type *CGRecordLayoutBuilder::getByteArrayType(CharUnits numBytes) {
-  assert(!numBytes.isZero() && "Empty byte arrays aren't allowed.");
-
-  llvm::Type *Ty = llvm::Type::getInt8Ty(Types.getLLVMContext());
-  if (numBytes > CharUnits::One())
-    Ty = llvm::ArrayType::get(Ty, numBytes.getQuantity());
-
-  return Ty;
+void CGRecordLowering::determinePacked() {
+  CharUnits Alignment = CharUnits::One();
+  for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+                                               MemberEnd = Members.end();
+       Member != MemberEnd; ++Member) {
+    if (!Member->Data)
+      continue;
+    // If any member falls at an offset that it not a multiple of its alignment,
+    // then the entire record must be packed.
+    if (Member->Offset % getAlignment(Member->Data))
+      Packed = true;
+    Alignment = std::max(Alignment, getAlignment(Member->Data));
+  }
+  // If the size of the record (the capstone's offset) is not a multiple of the
+  // record's alignment, it must be packed.
+  if (Members.back().Offset % Alignment)
+    Packed = true;
+  // Update the alignment of the sentinal.
+  if (!Packed)
+    Members.back().Data = getIntNType(Context.toBits(Alignment));
 }
 
-void CGRecordLayoutBuilder::AppendBytes(CharUnits numBytes) {
-  if (numBytes.isZero())
+void CGRecordLowering::insertPadding() {
+  std::vector<std::pair<CharUnits, CharUnits> > Padding;
+  CharUnits Size = CharUnits::Zero();
+  for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+                                               MemberEnd = Members.end();
+       Member != MemberEnd; ++Member) {
+    if (!Member->Data)
+      continue;
+    CharUnits Offset = Member->Offset;
+    assert(Offset >= Size);
+    // Insert padding if we need to.
+    if (Offset != Size.RoundUpToAlignment(Packed ? CharUnits::One() :
+                                          getAlignment(Member->Data)))
+      Padding.push_back(std::make_pair(Size, Offset - Size));
+    Size = Offset + getSize(Member->Data);
+  }
+  if (Padding.empty())
     return;
-
-  // Append the padding field
-  AppendField(NextFieldOffset, getByteArrayType(numBytes));
+  // Add the padding to the Members list and sort it.
+  for (std::vector<std::pair<CharUnits, CharUnits> >::const_iterator
+        Pad = Padding.begin(), PadEnd = Padding.end();
+        Pad != PadEnd; ++Pad)
+    Members.push_back(StorageInfo(Pad->first, getByteArrayType(Pad->second)));
+  std::stable_sort(Members.begin(), Members.end());
 }
 
-CharUnits CGRecordLayoutBuilder::getTypeAlignment(llvm::Type *Ty) const {
-  if (Packed)
-    return CharUnits::One();
-
-  return CharUnits::fromQuantity(Types.getDataLayout().getABITypeAlignment(Ty));
-}
-
-CharUnits CGRecordLayoutBuilder::getAlignmentAsLLVMStruct() const {
-  if (Packed)
-    return CharUnits::One();
-
-  CharUnits maxAlignment = CharUnits::One();
-  for (size_t i = 0; i != FieldTypes.size(); ++i)
-    maxAlignment = std::max(maxAlignment, getTypeAlignment(FieldTypes[i]));
-
-  return maxAlignment;
+void CGRecordLowering::fillOutputFields() {
+  for (std::vector<MemberInfo>::const_iterator Member = Members.begin(),
+                                               MemberEnd = Members.end();
+       Member != MemberEnd; ++Member) {
+    if (Member->Data)
+      FieldTypes.push_back(Member->Data);
+    if (Member->Kind == MemberInfo::Field) {
+      if (Member->FD)
+        Fields[Member->FD->getCanonicalDecl()] = FieldTypes.size() - 1;
+      // A field without storage must be a bitfield.
+      if (!Member->Data)
+        setBitFieldInfo(Member->FD, Member->Offset, FieldTypes.back());
+    } else if (Member->Kind == MemberInfo::Base)
+      NonVirtualBases[Member->RD] = FieldTypes.size() - 1;
+    else if (Member->Kind == MemberInfo::VBase)
+      VirtualBases[Member->RD] = FieldTypes.size() - 1;
+  }
 }
 
-/// Merge in whether a field of the given type is zero-initializable.
-void CGRecordLayoutBuilder::CheckZeroInitializable(QualType T) {
-  // This record already contains a member pointer.
-  if (!IsZeroInitializableAsBase)
-    return;
+CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types,
+                                        const FieldDecl *FD,
+                                        uint64_t Offset, uint64_t Size,
+                                        uint64_t StorageSize,
+                                        uint64_t StorageAlignment) {
+  // This function is vestigial from CGRecordLayoutBuilder days but is still 
+  // used in GCObjCRuntime.cpp.  That usage has a "fixme" attached to it that
+  // when addressed will allow for the removal of this function.
+  llvm::Type *Ty = Types.ConvertTypeForMem(FD->getType());
+  CharUnits TypeSizeInBytes =
+    CharUnits::fromQuantity(Types.getDataLayout().getTypeAllocSize(Ty));
+  uint64_t TypeSizeInBits = Types.getContext().toBits(TypeSizeInBytes);
 
-  // Can only have member pointers if we're compiling C++.
-  if (!Types.getContext().getLangOpts().CPlusPlus)
-    return;
+  bool IsSigned = FD->getType()->isSignedIntegerOrEnumerationType();
 
-  const Type *elementType = T->getBaseElementTypeUnsafe();
+  if (Size > TypeSizeInBits) {
+    // We have a wide bit-field. The extra bits are only used for padding, so
+    // if we have a bitfield of type T, with size N:
+    //
+    // T t : N;
+    //
+    // We can just assume that it's:
+    //
+    // T t : sizeof(T);
+    //
+    Size = TypeSizeInBits;
+  }
 
-  if (const MemberPointerType *MPT = elementType->getAs<MemberPointerType>()) {
-    if (!Types.getCXXABI().isZeroInitializable(MPT))
-      IsZeroInitializable = IsZeroInitializableAsBase = false;
-  } else if (const RecordType *RT = elementType->getAs<RecordType>()) {
-    const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
-    const CGRecordLayout &Layout = Types.getCGRecordLayout(RD);
-    if (!Layout.isZeroInitializable())
-      IsZeroInitializable = IsZeroInitializableAsBase = false;
+  // Reverse the bit offsets for big endian machines. Because we represent
+  // a bitfield as a single large integer load, we can imagine the bits
+  // counting from the most-significant-bit instead of the
+  // least-significant-bit.
+  if (Types.getDataLayout().isBigEndian()) {
+    Offset = StorageSize - (Offset + Size);
   }
+
+  return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageAlignment);
 }
 
 CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
                                                   llvm::StructType *Ty) {
-  CGRecordLayoutBuilder Builder(*this);
+  CGRecordLowering Builder(*this, D);
 
-  Builder.Layout(D);
-
-  Ty->setBody(Builder.FieldTypes, Builder.Packed);
+  Builder.lower(false);
 
   // If we're in C++, compute the base subobject type.
-  llvm::StructType *BaseTy = 0;
-  if (isa<CXXRecordDecl>(D) && !D->isUnion()) {
-    BaseTy = Builder.BaseSubobjectType;
-    if (!BaseTy) BaseTy = Ty;
+  llvm::StructType *BaseTy = nullptr;
+  if (isa<CXXRecordDecl>(D) && !D->isUnion() && !D->hasAttr<FinalAttr>()) {
+    BaseTy = Ty;
+    if (Builder.Layout.getNonVirtualSize() != Builder.Layout.getSize()) {
+      CGRecordLowering BaseBuilder(*this, D);
+      BaseBuilder.lower(true);
+      BaseTy = llvm::StructType::create(
+          getLLVMContext(), BaseBuilder.FieldTypes, "", BaseBuilder.Packed);
+      addRecordTypeName(D, BaseTy, ".base");
+    }
   }
 
+  // Fill in the struct *after* computing the base type.  Filling in the body
+  // signifies that the type is no longer opaque and record layout is complete,
+  // but we may need to recursively layout D while laying D out as a base type.
+  Ty->setBody(Builder.FieldTypes, Builder.Packed);
+
   CGRecordLayout *RL =
     new CGRecordLayout(Ty, BaseTy, Builder.IsZeroInitializable,
-                       Builder.IsZeroInitializableAsBase);
+                        Builder.IsZeroInitializableAsBase);
 
   RL->NonVirtualBases.swap(Builder.NonVirtualBases);
   RL->CompleteObjectVirtualBases.swap(Builder.VirtualBases);
@@ -994,12 +695,9 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D,
 
   if (BaseTy) {
     CharUnits NonVirtualSize  = Layout.getNonVirtualSize();
-    CharUnits NonVirtualAlign = Layout.getNonVirtualAlign();
-    CharUnits AlignedNonVirtualTypeSize = 
-      NonVirtualSize.RoundUpToAlignment(NonVirtualAlign);
 
     uint64_t AlignedNonVirtualTypeSizeInBits = 
-      getContext().toBits(AlignedNonVirtualTypeSize);
+      getContext().toBits(NonVirtualSize);
 
     assert(AlignedNonVirtualTypeSizeInBits == 
            getDataLayout().getTypeAllocSizeInBits(BaseTy) &&
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 0bc51ddb5178..c2b64a7662b6 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -16,14 +16,15 @@
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/StmtVisitor.h"
-#include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Sema/LoopHint.h"
+#include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/CallSite.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -43,6 +44,7 @@ void CodeGenFunction::EmitStopPoint(const Stmt *S) {
 
 void CodeGenFunction::EmitStmt(const Stmt *S) {
   assert(S && "Null statement?");
+  PGO.setCurrentStmt(S);
 
   // These statements have their own debug info handling.
   if (EmitSimpleStmt(S))
@@ -75,7 +77,6 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::SEHExceptStmtClass:
   case Stmt::SEHFinallyStmtClass:
   case Stmt::MSDependentExistsStmtClass:
-  case Stmt::OMPParallelDirectiveClass:
     llvm_unreachable("invalid statement class to emit generically");
   case Stmt::NullStmtClass:
   case Stmt::CompoundStmtClass:
@@ -172,6 +173,54 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::SEHTryStmtClass:
     EmitSEHTryStmt(cast<SEHTryStmt>(*S));
     break;
+  case Stmt::SEHLeaveStmtClass:
+    EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S));
+    break;
+  case Stmt::OMPParallelDirectiveClass:
+    EmitOMPParallelDirective(cast<OMPParallelDirective>(*S));
+    break;
+  case Stmt::OMPSimdDirectiveClass:
+    EmitOMPSimdDirective(cast<OMPSimdDirective>(*S));
+    break;
+  case Stmt::OMPForDirectiveClass:
+    EmitOMPForDirective(cast<OMPForDirective>(*S));
+    break;
+  case Stmt::OMPSectionsDirectiveClass:
+    EmitOMPSectionsDirective(cast<OMPSectionsDirective>(*S));
+    break;
+  case Stmt::OMPSectionDirectiveClass:
+    EmitOMPSectionDirective(cast<OMPSectionDirective>(*S));
+    break;
+  case Stmt::OMPSingleDirectiveClass:
+    EmitOMPSingleDirective(cast<OMPSingleDirective>(*S));
+    break;
+  case Stmt::OMPMasterDirectiveClass:
+    EmitOMPMasterDirective(cast<OMPMasterDirective>(*S));
+    break;
+  case Stmt::OMPCriticalDirectiveClass:
+    EmitOMPCriticalDirective(cast<OMPCriticalDirective>(*S));
+    break;
+  case Stmt::OMPParallelForDirectiveClass:
+    EmitOMPParallelForDirective(cast<OMPParallelForDirective>(*S));
+    break;
+  case Stmt::OMPParallelSectionsDirectiveClass:
+    EmitOMPParallelSectionsDirective(cast<OMPParallelSectionsDirective>(*S));
+    break;
+  case Stmt::OMPTaskDirectiveClass:
+    EmitOMPTaskDirective(cast<OMPTaskDirective>(*S));
+    break;
+  case Stmt::OMPTaskyieldDirectiveClass:
+    EmitOMPTaskyieldDirective(cast<OMPTaskyieldDirective>(*S));
+    break;
+  case Stmt::OMPBarrierDirectiveClass:
+    EmitOMPBarrierDirective(cast<OMPBarrierDirective>(*S));
+    break;
+  case Stmt::OMPTaskwaitDirectiveClass:
+    EmitOMPTaskwaitDirective(cast<OMPTaskwaitDirective>(*S));
+    break;
+  case Stmt::OMPFlushDirectiveClass:
+    EmitOMPFlushDirective(cast<OMPFlushDirective>(*S));
+    break;
   }
 }
 
@@ -217,7 +266,7 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S,
        E = S.body_end()-GetLast; I != E; ++I)
     EmitStmt(*I);
 
-  llvm::Value *RetAlloca = 0;
+  llvm::Value *RetAlloca = nullptr;
   if (GetLast) {
     // We have to special case labels here.  They are statements, but when put
     // at the end of a statement expression, they yield the value of their
@@ -242,7 +291,7 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S,
       EmitAnyExprToMem(cast<Expr>(LastStmt), RetAlloca, Qualifiers(),
                        /*IsInit*/false);
     }
-      
+
   }
 
   return RetAlloca;
@@ -309,9 +358,8 @@ void CodeGenFunction::EmitBranch(llvm::BasicBlock *Target) {
 
 void CodeGenFunction::EmitBlockAfterUses(llvm::BasicBlock *block) {
   bool inserted = false;
-  for (llvm::BasicBlock::use_iterator
-         i = block->use_begin(), e = block->use_end(); i != e; ++i) {
-    if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(*i)) {
+  for (llvm::User *u : block->users()) {
+    if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(u)) {
       CurFn->getBasicBlockList().insertAfter(insn->getParent(), block);
       inserted = true;
       break;
@@ -358,7 +406,9 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) {
     ResolveBranchFixups(Dest.getBlock());
   }
 
+  RegionCounter Cnt = getPGORegionCounter(D->getStmt());
   EmitBlock(Dest.getBlock());
+  Cnt.beginRegion(Builder);
 }
 
 /// Change the cleanup scope of the labels in this lexical scope to
@@ -391,7 +441,23 @@ void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) {
 }
 
 void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
-  EmitStmt(S.getSubStmt());
+  const Stmt *SubStmt = S.getSubStmt();
+  switch (SubStmt->getStmtClass()) {
+  case Stmt::DoStmtClass:
+    EmitDoStmt(cast<DoStmt>(*SubStmt), S.getAttrs());
+    break;
+  case Stmt::ForStmtClass:
+    EmitForStmt(cast<ForStmt>(*SubStmt), S.getAttrs());
+    break;
+  case Stmt::WhileStmtClass:
+    EmitWhileStmt(cast<WhileStmt>(*SubStmt), S.getAttrs());
+    break;
+  case Stmt::CXXForRangeStmtClass:
+    EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*SubStmt), S.getAttrs());
+    break;
+  default:
+    EmitStmt(SubStmt);
+  }
 }
 
 void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
@@ -429,7 +495,8 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
 void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
   // C99 6.8.4.1: The first substatement is executed if the expression compares
   // unequal to 0.  The condition must be a scalar type.
-  LexicalScope ConditionScope(*this, S.getSourceRange());
+  LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());
+  RegionCounter Cnt = getPGORegionCounter(&S);
 
   if (S.getConditionVariable())
     EmitAutoVarDecl(*S.getConditionVariable());
@@ -447,6 +514,8 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
     // If the skipped block has no labels in it, just emit the executed block.
     // This avoids emitting dead code and simplifies the CFG substantially.
     if (!ContainsLabel(Skipped)) {
+      if (CondConstant)
+        Cnt.beginRegion(Builder);
       if (Executed) {
         RunCleanupsScope ExecutedScope(*this);
         EmitStmt(Executed);
@@ -462,10 +531,12 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
   llvm::BasicBlock *ElseBlock = ContBlock;
   if (S.getElse())
     ElseBlock = createBasicBlock("if.else");
-  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock);
+
+  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
 
   // Emit the 'then' code.
-  EmitBlock(ThenBlock); 
+  EmitBlock(ThenBlock);
+  Cnt.beginRegion(Builder);
   {
     RunCleanupsScope ThenScope(*this);
     EmitStmt(S.getThen());
@@ -474,30 +545,121 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
 
   // Emit the 'else' code if present.
   if (const Stmt *Else = S.getElse()) {
-    // There is no need to emit line number for unconditional branch.
-    if (getDebugInfo())
-      Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-    EmitBlock(ElseBlock);
+    {
+      // There is no need to emit line number for unconditional branch.
+      SuppressDebugLocation S(Builder);
+      EmitBlock(ElseBlock);
+    }
     {
       RunCleanupsScope ElseScope(*this);
       EmitStmt(Else);
     }
-    // There is no need to emit line number for unconditional branch.
-    if (getDebugInfo())
-      Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-    EmitBranch(ContBlock);
+    {
+      // There is no need to emit line number for unconditional branch.
+      SuppressDebugLocation S(Builder);
+      EmitBranch(ContBlock);
+    }
   }
 
   // Emit the continuation block for code after the if.
   EmitBlock(ContBlock, true);
 }
 
-void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
+void CodeGenFunction::EmitCondBrHints(llvm::LLVMContext &Context,
+                                      llvm::BranchInst *CondBr,
+                                      const ArrayRef<const Attr *> &Attrs) {
+  // Return if there are no hints.
+  if (Attrs.empty())
+    return;
+
+  // Add vectorize and unroll hints to the metadata on the conditional branch.
+  SmallVector<llvm::Value *, 2> Metadata(1);
+  for (const auto *Attr : Attrs) {
+    const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
+
+    // Skip non loop hint attributes
+    if (!LH)
+      continue;
+
+    LoopHintAttr::OptionType Option = LH->getOption();
+    int ValueInt = LH->getValue();
+
+    const char *MetadataName;
+    switch (Option) {
+    case LoopHintAttr::Vectorize:
+    case LoopHintAttr::VectorizeWidth:
+      MetadataName = "llvm.loop.vectorize.width";
+      break;
+    case LoopHintAttr::Interleave:
+    case LoopHintAttr::InterleaveCount:
+      MetadataName = "llvm.loop.interleave.count";
+      break;
+    case LoopHintAttr::Unroll:
+      MetadataName = "llvm.loop.unroll.enable";
+      break;
+    case LoopHintAttr::UnrollCount:
+      MetadataName = "llvm.loop.unroll.count";
+      break;
+    }
+
+    llvm::Value *Value;
+    llvm::MDString *Name;
+    switch (Option) {
+    case LoopHintAttr::Vectorize:
+    case LoopHintAttr::Interleave:
+      if (ValueInt == 1) {
+        // FIXME: In the future I will modifiy the behavior of the metadata
+        // so we can enable/disable vectorization and interleaving separately.
+        Name = llvm::MDString::get(Context, "llvm.loop.vectorize.enable");
+        Value = Builder.getTrue();
+        break;
+      }
+      // Vectorization/interleaving is disabled, set width/count to 1.
+      ValueInt = 1;
+      // Fallthrough.
+    case LoopHintAttr::VectorizeWidth:
+    case LoopHintAttr::InterleaveCount:
+      Name = llvm::MDString::get(Context, MetadataName);
+      Value = llvm::ConstantInt::get(Int32Ty, ValueInt);
+      break;
+    case LoopHintAttr::Unroll:
+      Name = llvm::MDString::get(Context, MetadataName);
+      Value = (ValueInt == 0) ? Builder.getFalse() : Builder.getTrue();
+      break;
+    case LoopHintAttr::UnrollCount:
+      Name = llvm::MDString::get(Context, MetadataName);
+      Value = llvm::ConstantInt::get(Int32Ty, ValueInt);
+      break;
+    }
+
+    SmallVector<llvm::Value *, 2> OpValues;
+    OpValues.push_back(Name);
+    OpValues.push_back(Value);
+
+    // Set or overwrite metadata indicated by Name.
+    Metadata.push_back(llvm::MDNode::get(Context, OpValues));
+  }
+
+  if (!Metadata.empty()) {
+    // Add llvm.loop MDNode to CondBr.
+    llvm::MDNode *LoopID = llvm::MDNode::get(Context, Metadata);
+    LoopID->replaceOperandWith(0, LoopID); // First op points to itself.
+
+    CondBr->setMetadata("llvm.loop", LoopID);
+  }
+}
+
+void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
+                                    const ArrayRef<const Attr *> &WhileAttrs) {
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   // Emit the header for the loop, which will also become
   // the continue target.
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
   EmitBlock(LoopHeader.getBlock());
 
+  LoopStack.push(LoopHeader.getBlock());
+
   // Create an exit block for when the condition fails, which will
   // also become the break target.
   JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
@@ -535,13 +697,17 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
     llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
     if (ConditionScope.requiresCleanups())
       ExitBlock = createBasicBlock("while.exit");
-
-    Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
+    llvm::BranchInst *CondBr =
+        Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
+                             PGO.createLoopWeights(S.getCond(), Cnt));
 
     if (ExitBlock != LoopExit.getBlock()) {
       EmitBlock(ExitBlock);
       EmitBranchThroughCleanup(LoopExit);
     }
+
+    // Attach metadata to loop body conditional branch.
+    EmitCondBrHints(LoopBody->getContext(), CondBr, WhileAttrs);
   }
 
   // Emit the loop body.  We have to emit this in a cleanup scope
@@ -549,6 +715,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
   {
     RunCleanupsScope BodyScope(*this);
     EmitBlock(LoopBody);
+    Cnt.beginRegion(Builder);
     EmitStmt(S.getBody());
   }
 
@@ -560,6 +727,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
   // Branch to the loop header again.
   EmitBranch(LoopHeader.getBlock());
 
+  LoopStack.pop();
+
   // Emit the exit block.
   EmitBlock(LoopExit.getBlock(), true);
 
@@ -569,23 +738,27 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
     SimplifyForwardingBlocks(LoopHeader.getBlock());
 }
 
-void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
+void CodeGenFunction::EmitDoStmt(const DoStmt &S,
+                                 const ArrayRef<const Attr *> &DoAttrs) {
   JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
   JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
 
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   // Store the blocks to use for break and continue.
   BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
 
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
-  EmitBlock(LoopBody);
+
+  LoopStack.push(LoopBody);
+
+  EmitBlockWithFallThrough(LoopBody, Cnt);
   {
     RunCleanupsScope BodyScope(*this);
     EmitStmt(S.getBody());
   }
 
-  BreakContinueStack.pop_back();
-
   EmitBlock(LoopCond.getBlock());
 
   // C99 6.8.5.2: "The evaluation of the controlling expression takes place
@@ -596,6 +769,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
   // compares unequal to 0.  The condition must be a scalar type.
   llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
 
+  BreakContinueStack.pop_back();
+
   // "do {} while (0)" is common in macros, avoid extra blocks.  Be sure
   // to correctly handle break/continue though.
   bool EmitBoolCondBranch = true;
@@ -604,8 +779,16 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
       EmitBoolCondBranch = false;
 
   // As long as the condition is true, iterate the loop.
-  if (EmitBoolCondBranch)
-    Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock());
+  if (EmitBoolCondBranch) {
+    llvm::BranchInst *CondBr =
+        Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
+                             PGO.createLoopWeights(S.getCond(), Cnt));
+
+    // Attach metadata to loop body conditional branch.
+    EmitCondBrHints(LoopBody->getContext(), CondBr, DoAttrs);
+  }
+
+  LoopStack.pop();
 
   // Emit the exit block.
   EmitBlock(LoopExit.getBlock());
@@ -616,7 +799,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
     SimplifyForwardingBlocks(LoopCond.getBlock());
 }
 
-void CodeGenFunction::EmitForStmt(const ForStmt &S) {
+void CodeGenFunction::EmitForStmt(const ForStmt &S,
+                                  const ArrayRef<const Attr *> &ForAttrs) {
   JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
 
   RunCleanupsScope ForScope(*this);
@@ -629,6 +813,8 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
   if (S.getInit())
     EmitStmt(S.getInit());
 
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   // Start the loop with a block that tests the condition.
   // If there's an increment, the continue scope will be overwritten
   // later.
@@ -636,6 +822,18 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
   llvm::BasicBlock *CondBlock = Continue.getBlock();
   EmitBlock(CondBlock);
 
+  LoopStack.push(CondBlock);
+
+  // If the for loop doesn't have an increment we can just use the
+  // condition as the continue block.  Otherwise we'll need to create
+  // a block for it (in the current scope, i.e. in the scope of the
+  // condition), and that we will become our continue block.
+  if (S.getInc())
+    Continue = getJumpDestInCurrentScope("for.inc");
+
+  // Store the blocks to use for break and continue.
+  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+
   // Create a cleanup scope for the condition variable cleanups.
   RunCleanupsScope ConditionScope(*this);
 
@@ -657,7 +855,13 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
 
     // C99 6.8.5p2/p4: The first substatement is executed if the expression
     // compares unequal to 0.  The condition must be a scalar type.
-    EmitBranchOnBoolExpr(S.getCond(), ForBody, ExitBlock);
+    llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
+    llvm::BranchInst *CondBr =
+        Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock,
+                             PGO.createLoopWeights(S.getCond(), Cnt));
+
+    // Attach metadata to loop body conditional branch.
+    EmitCondBrHints(ForBody->getContext(), CondBr, ForAttrs);
 
     if (ExitBlock != LoopExit.getBlock()) {
       EmitBlock(ExitBlock);
@@ -669,16 +873,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
     // Treat it as a non-zero constant.  Don't even create a new block for the
     // body, just fall into it.
   }
-
-  // If the for loop doesn't have an increment we can just use the
-  // condition as the continue block.  Otherwise we'll need to create
-  // a block for it (in the current scope, i.e. in the scope of the
-  // condition), and that we will become our continue block.
-  if (S.getInc())
-    Continue = getJumpDestInCurrentScope("for.inc");
-
-  // Store the blocks to use for break and continue.
-  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+  Cnt.beginRegion(Builder);
 
   {
     // Create a separate cleanup scope for the body, in case it is not
@@ -703,11 +898,15 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
   if (DI)
     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 
+  LoopStack.pop();
+
   // Emit the fall-through block.
   EmitBlock(LoopExit.getBlock(), true);
 }
 
-void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
+void
+CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
+                                     const ArrayRef<const Attr *> &ForAttrs) {
   JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
 
   RunCleanupsScope ForScope(*this);
@@ -720,12 +919,16 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
   EmitStmt(S.getRangeStmt());
   EmitStmt(S.getBeginEndStmt());
 
+  RegionCounter Cnt = getPGORegionCounter(&S);
+
   // Start the loop with a block that tests the condition.
   // If there's an increment, the continue scope will be overwritten
   // later.
   llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
   EmitBlock(CondBlock);
 
+  LoopStack.push(CondBlock);
+
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
@@ -737,7 +940,12 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
 
   // The body is executed if the expression, contextually converted
   // to bool, is true.
-  EmitBranchOnBoolExpr(S.getCond(), ForBody, ExitBlock);
+  llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
+  llvm::BranchInst *CondBr = Builder.CreateCondBr(
+      BoolCondVal, ForBody, ExitBlock, PGO.createLoopWeights(S.getCond(), Cnt));
+
+  // Attach metadata to loop body conditional branch.
+  EmitCondBrHints(ForBody->getContext(), CondBr, ForAttrs);
 
   if (ExitBlock != LoopExit.getBlock()) {
     EmitBlock(ExitBlock);
@@ -745,6 +953,7 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
   }
 
   EmitBlock(ForBody);
+  Cnt.beginRegion(Builder);
 
   // Create a block for the increment. In case of a 'continue', we jump there.
   JumpDest Continue = getJumpDestInCurrentScope("for.inc");
@@ -772,6 +981,8 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
   if (DI)
     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 
+  LoopStack.pop();
+
   // Emit the fall-through block.
   EmitBlock(LoopExit.getBlock(), true);
 }
@@ -809,7 +1020,8 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
 
   // FIXME: Clean this up by using an LValue for ReturnTemp,
   // EmitStoreThroughLValue, and EmitAnyExpr.
-  if (S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
+  if (getLangOpts().ElideConstructors &&
+      S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
     // Apply the named return value optimization for this return statement,
     // which means doing nothing: the appropriate result has already been
     // constructed into the NRVO variable.
@@ -818,12 +1030,12 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
     // that the cleanup code should not destroy the variable.
     if (llvm::Value *NRVOFlag = NRVOFlags[S.getNRVOCandidate()])
       Builder.CreateStore(Builder.getTrue(), NRVOFlag);
-  } else if (!ReturnValue) {
+  } else if (!ReturnValue || (RV && RV->getType()->isVoidType())) {
     // Make sure not to return anything, but evaluate the expression
     // for side effects.
     if (RV)
       EmitAnyExpr(RV);
-  } else if (RV == 0) {
+  } else if (!RV) {
     // Do nothing (return value is left uninitialized)
   } else if (FnRetTy->isReferenceType()) {
     // If this function returns a reference, take the address of the expression
@@ -853,7 +1065,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
   }
 
   ++NumReturnExprs;
-  if (RV == 0 || RV->isEvaluatable(getContext()))
+  if (!RV || RV->isEvaluatable(getContext()))
     ++NumSimpleReturnExprs;
 
   cleanupScope.ForceCleanup();
@@ -866,9 +1078,8 @@ void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
   if (HaveInsertPoint())
     EmitStopPoint(&S);
 
-  for (DeclStmt::const_decl_iterator I = S.decl_begin(), E = S.decl_end();
-       I != E; ++I)
-    EmitDecl(**I);
+  for (const auto *I : S.decls())
+    EmitDecl(*I);
 }
 
 void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) {
@@ -880,8 +1091,7 @@ void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) {
   if (HaveInsertPoint())
     EmitStopPoint(&S);
 
-  JumpDest Block = BreakContinueStack.back().BreakBlock;
-  EmitBranchThroughCleanup(Block);
+  EmitBranchThroughCleanup(BreakContinueStack.back().BreakBlock);
 }
 
 void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
@@ -893,8 +1103,7 @@ void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
   if (HaveInsertPoint())
     EmitStopPoint(&S);
 
-  JumpDest Block = BreakContinueStack.back().ContinueBlock;
-  EmitBranchThroughCleanup(Block);
+  EmitBranchThroughCleanup(BreakContinueStack.back().ContinueBlock);
 }
 
 /// EmitCaseStmtRange - If case statement range is not too big then
@@ -906,11 +1115,13 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
   llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
   llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
 
+  RegionCounter CaseCnt = getPGORegionCounter(&S);
+
   // Emit the code for this case. We do this first to make sure it is
   // properly chained from our predecessor before generating the
   // switch machinery to enter this block.
-  EmitBlock(createBasicBlock("sw.bb"));
-  llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+  llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
+  EmitBlockWithFallThrough(CaseDest, CaseCnt);
   EmitStmt(S.getSubStmt());
 
   // If range is empty, do nothing.
@@ -921,7 +1132,18 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
   // FIXME: parameters such as this should not be hardcoded.
   if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
     // Range is small enough to add multiple switch instruction cases.
-    for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) {
+    uint64_t Total = CaseCnt.getCount();
+    unsigned NCases = Range.getZExtValue() + 1;
+    // We only have one region counter for the entire set of cases here, so we
+    // need to divide the weights evenly between the generated cases, ensuring
+    // that the total weight is preserved. E.g., a weight of 5 over three cases
+    // will be distributed as weights of 2, 2, and 1.
+    uint64_t Weight = Total / NCases, Rem = Total % NCases;
+    for (unsigned I = 0; I != NCases; ++I) {
+      if (SwitchWeights)
+        SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
+      if (Rem)
+        Rem--;
       SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
       LHS++;
     }
@@ -946,7 +1168,19 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
     Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
   llvm::Value *Cond =
     Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
-  Builder.CreateCondBr(Cond, CaseDest, FalseDest);
+
+  llvm::MDNode *Weights = nullptr;
+  if (SwitchWeights) {
+    uint64_t ThisCount = CaseCnt.getCount();
+    uint64_t DefaultCount = (*SwitchWeights)[0];
+    Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
+
+    // Since we're chaining the switch default through each large case range, we
+    // need to update the weight for the default, ie, the first case, to include
+    // this case.
+    (*SwitchWeights)[0] += ThisCount;
+  }
+  Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
 
   // Restore the appropriate insertion point.
   if (RestoreBB)
@@ -959,7 +1193,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
   // If there is no enclosing switch instance that we're aware of, then this
   // case statement and its block can be elided.  This situation only happens
   // when we've constant-folded the switch, are emitting the constant case,
-  // and part of the constant case includes another case statement.  For 
+  // and part of the constant case includes another case statement.  For
   // instance: switch (4) { case 4: do { case 5: } while (1); }
   if (!SwitchInsn) {
     EmitStmt(S.getSubStmt());
@@ -972,17 +1206,22 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
     return;
   }
 
+  RegionCounter CaseCnt = getPGORegionCounter(&S);
   llvm::ConstantInt *CaseVal =
     Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
 
-  // If the body of the case is just a 'break', and if there was no fallthrough,
-  // try to not emit an empty block.
-  if ((CGM.getCodeGenOpts().OptimizationLevel > 0) &&
+  // If the body of the case is just a 'break', try to not emit an empty block.
+  // If we're profiling or we're not optimizing, leave the block in for better
+  // debug and coverage analysis.
+  if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
+      CGM.getCodeGenOpts().OptimizationLevel > 0 &&
       isa<BreakStmt>(S.getSubStmt())) {
     JumpDest Block = BreakContinueStack.back().BreakBlock;
 
     // Only do this optimization if there are no cleanups that need emitting.
     if (isObviouslyBranchWithoutCleanups(Block)) {
+      if (SwitchWeights)
+        SwitchWeights->push_back(CaseCnt.getCount());
       SwitchInsn->addCase(CaseVal, Block.getBlock());
 
       // If there was a fallthrough into this case, make sure to redirect it to
@@ -995,8 +1234,10 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
     }
   }
 
-  EmitBlock(createBasicBlock("sw.bb"));
-  llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
+  llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
+  EmitBlockWithFallThrough(CaseDest, CaseCnt);
+  if (SwitchWeights)
+    SwitchWeights->push_back(CaseCnt.getCount());
   SwitchInsn->addCase(CaseVal, CaseDest);
 
   // Recursively emitting the statement is acceptable, but is not wonderful for
@@ -1012,10 +1253,19 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
   const CaseStmt *NextCase = dyn_cast<CaseStmt>(S.getSubStmt());
 
   // Otherwise, iteratively add consecutive cases to this switch stmt.
-  while (NextCase && NextCase->getRHS() == 0) {
+  while (NextCase && NextCase->getRHS() == nullptr) {
     CurCase = NextCase;
-    llvm::ConstantInt *CaseVal = 
+    llvm::ConstantInt *CaseVal =
       Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
+
+    CaseCnt = getPGORegionCounter(NextCase);
+    if (SwitchWeights)
+      SwitchWeights->push_back(CaseCnt.getCount());
+    if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
+      CaseDest = createBasicBlock("sw.bb");
+      EmitBlockWithFallThrough(CaseDest, CaseCnt);
+    }
+
     SwitchInsn->addCase(CaseVal, CaseDest);
     NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
   }
@@ -1028,7 +1278,10 @@ void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) {
   llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
   assert(DefaultBlock->empty() &&
          "EmitDefaultStmt: Default block already defined?");
-  EmitBlock(DefaultBlock);
+
+  RegionCounter Cnt = getPGORegionCounter(&S);
+  EmitBlockWithFallThrough(DefaultBlock, Cnt);
+
   EmitStmt(S.getSubStmt());
 }
 
@@ -1061,7 +1314,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
                                             bool &FoundCase,
                               SmallVectorImpl<const Stmt*> &ResultStmts) {
   // If this is a null statement, just succeed.
-  if (S == 0)
+  if (!S)
     return Case ? CSFC_Success : CSFC_FallThrough;
 
   // If this is the switchcase (case 4: or default) that we're looking for, then
@@ -1069,7 +1322,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
   if (const SwitchCase *SC = dyn_cast<SwitchCase>(S)) {
     if (S == Case) {
       FoundCase = true;
-      return CollectStatementsForCase(SC->getSubStmt(), 0, FoundCase,
+      return CollectStatementsForCase(SC->getSubStmt(), nullptr, FoundCase,
                                       ResultStmts);
     }
 
@@ -1080,7 +1333,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
 
   // If we are in the live part of the code and we found our break statement,
   // return a success!
-  if (Case == 0 && isa<BreakStmt>(S))
+  if (!Case && isa<BreakStmt>(S))
     return CSFC_Success;
 
   // If this is a switch statement, then it might contain the SwitchCase, the
@@ -1125,7 +1378,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
           // statements in the compound statement as candidates for inclusion.
           assert(FoundCase && "Didn't find case but returned fallthrough?");
           // We recursively found Case, so we're not looking for it anymore.
-          Case = 0;
+          Case = nullptr;
 
           // If we found the case and skipped declarations, we can't do the
           // optimization.
@@ -1139,7 +1392,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
     // If we have statements in our range, then we know that the statements are
     // live and need to be added to the set of statements we're tracking.
     for (; I != E; ++I) {
-      switch (CollectStatementsForCase(*I, 0, FoundCase, ResultStmts)) {
+      switch (CollectStatementsForCase(*I, nullptr, FoundCase, ResultStmts)) {
       case CSFC_Failure: return CSFC_Failure;
       case CSFC_FallThrough:
         // A fallthrough result means that the statement was simple and just
@@ -1185,11 +1438,12 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
 static bool FindCaseStatementsForValue(const SwitchStmt &S,
                                        const llvm::APSInt &ConstantCondValue,
                                 SmallVectorImpl<const Stmt*> &ResultStmts,
-                                       ASTContext &C) {
+                                       ASTContext &C,
+                                       const SwitchCase *&ResultCase) {
   // First step, find the switch case that is being branched to.  We can do this
   // efficiently by scanning the SwitchCase list.
   const SwitchCase *Case = S.getSwitchCaseList();
-  const DefaultStmt *DefaultCase = 0;
+  const DefaultStmt *DefaultCase = nullptr;
 
   for (; Case; Case = Case->getNextSwitchCase()) {
     // It's either a default or case.  Just remember the default statement in
@@ -1211,10 +1465,10 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S,
 
   // If we didn't find a matching case, we use a default if it exists, or we
   // elide the whole switch body!
-  if (Case == 0) {
+  if (!Case) {
     // It is safe to elide the body of the switch if it doesn't contain labels
     // etc.  If it is safe, return successfully with an empty ResultStmts list.
-    if (DefaultCase == 0)
+    if (!DefaultCase)
       return !CodeGenFunction::ContainsLabel(&S);
     Case = DefaultCase;
   }
@@ -1228,21 +1482,16 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S,
   //   while (1) {
   //     case 4: ...
   bool FoundCase = false;
+  ResultCase = Case;
   return CollectStatementsForCase(S.getBody(), Case, FoundCase,
                                   ResultStmts) != CSFC_Failure &&
          FoundCase;
 }
 
 void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
-  JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");
-
-  RunCleanupsScope ConditionScope(*this);
-
-  if (S.getConditionVariable())
-    EmitAutoVarDecl(*S.getConditionVariable());
-
   // Handle nested switch statements.
   llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
+  SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
   llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
 
   // See if we can constant fold the condition of the switch and therefore only
@@ -1250,19 +1499,31 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   llvm::APSInt ConstantCondValue;
   if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
     SmallVector<const Stmt*, 4> CaseStmts;
+    const SwitchCase *Case = nullptr;
     if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
-                                   getContext())) {
+                                   getContext(), Case)) {
+      if (Case) {
+        RegionCounter CaseCnt = getPGORegionCounter(Case);
+        CaseCnt.beginRegion(Builder);
+      }
       RunCleanupsScope ExecutedScope(*this);
 
+      // Emit the condition variable if needed inside the entire cleanup scope
+      // used by this special case for constant folded switches.
+      if (S.getConditionVariable())
+        EmitAutoVarDecl(*S.getConditionVariable());
+
       // At this point, we are no longer "within" a switch instance, so
       // we can temporarily enforce this to ensure that any embedded case
       // statements are not emitted.
-      SwitchInsn = 0;
+      SwitchInsn = nullptr;
 
       // Okay, we can dead code eliminate everything except this case.  Emit the
       // specified series of statements and we're good.
       for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
         EmitStmt(CaseStmts[i]);
+      RegionCounter ExitCnt = getPGORegionCounter(&S);
+      ExitCnt.beginRegion(Builder);
 
       // Now we want to restore the saved switch instance so that nested
       // switches continue to function properly
@@ -1272,6 +1533,11 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
     }
   }
 
+  JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");
+
+  RunCleanupsScope ConditionScope(*this);
+  if (S.getConditionVariable())
+    EmitAutoVarDecl(*S.getConditionVariable());
   llvm::Value *CondV = EmitScalarExpr(S.getCond());
 
   // Create basic block to hold stuff that comes after switch
@@ -1280,12 +1546,29 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   // failure.
   llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
   SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
+  if (PGO.haveRegionCounts()) {
+    // Walk the SwitchCase list to find how many there are.
+    uint64_t DefaultCount = 0;
+    unsigned NumCases = 0;
+    for (const SwitchCase *Case = S.getSwitchCaseList();
+         Case;
+         Case = Case->getNextSwitchCase()) {
+      if (isa<DefaultStmt>(Case))
+        DefaultCount = getPGORegionCounter(Case).getCount();
+      NumCases += 1;
+    }
+    SwitchWeights = new SmallVector<uint64_t, 16>();
+    SwitchWeights->reserve(NumCases);
+    // The default needs to be first. We store the edge count, so we already
+    // know the right weight.
+    SwitchWeights->push_back(DefaultCount);
+  }
   CaseRangeBlock = DefaultBlock;
 
   // Clear the insertion point to indicate we are in unreachable code.
   Builder.ClearInsertionPoint();
 
-  // All break statements jump to NextBlock. If BreakContinueStack is non empty
+  // All break statements jump to NextBlock. If BreakContinueStack is non-empty
   // then reuse last ContinueBlock.
   JumpDest OuterContinue;
   if (!BreakContinueStack.empty())
@@ -1320,14 +1603,26 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
 
   // Emit continuation.
   EmitBlock(SwitchExit.getBlock(), true);
-
+  RegionCounter ExitCnt = getPGORegionCounter(&S);
+  ExitCnt.beginRegion(Builder);
+
+  if (SwitchWeights) {
+    assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
+           "switch weights do not match switch cases");
+    // If there's only one jump destination there's no sense weighting it.
+    if (SwitchWeights->size() > 1)
+      SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
+                              PGO.createBranchWeights(*SwitchWeights));
+    delete SwitchWeights;
+  }
   SwitchInsn = SavedSwitchInsn;
+  SwitchWeights = SavedSwitchWeights;
   CaseRangeBlock = SavedCRBlock;
 }
 
 static std::string
 SimplifyConstraint(const char *Constraint, const TargetInfo &Target,
-                 SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons=0) {
+                 SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons=nullptr) {
   std::string Result;
 
   while (*Constraint) {
@@ -1493,7 +1788,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
       Name = GAS->getOutputName(i);
     TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
     bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
-    assert(IsValid && "Failed to parse output constraint"); 
+    assert(IsValid && "Failed to parse output constraint");
     OutputConstraintInfos.push_back(Info);
   }
 
@@ -1791,20 +2086,32 @@ static LValue InitCapturedStruct(CodeGenFunction &CGF, const CapturedStmt &S) {
   return SlotLV;
 }
 
+static void InitVLACaptures(CodeGenFunction &CGF, const CapturedStmt &S) {
+  for (auto &C : S.captures()) {
+    if (C.capturesVariable()) {
+      QualType QTy;
+      auto VD = C.getCapturedVar();
+      if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(VD))
+        QTy = PVD->getOriginalType();
+      else
+        QTy = VD->getType();
+      if (QTy->isVariablyModifiedType()) {
+        CGF.EmitVariablyModifiedType(QTy);
+      }
+    }
+  }
+}
+
 /// Generate an outlined function for the body of a CapturedStmt, store any
 /// captured variables into the captured struct, and call the outlined function.
 llvm::Function *
 CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) {
-  const CapturedDecl *CD = S.getCapturedDecl();
-  const RecordDecl *RD = S.getCapturedRecordDecl();
-  assert(CD->hasBody() && "missing CapturedDecl body");
-
   LValue CapStruct = InitCapturedStruct(*this, S);
 
   // Emit the CapturedDecl
   CodeGenFunction CGF(CGM, true);
   CGF.CapturedStmtInfo = new CGCapturedStmtInfo(S, K);
-  llvm::Function *F = CGF.GenerateCapturedStmtFunction(CD, RD, S.getLocStart());
+  llvm::Function *F = CGF.GenerateCapturedStmtFunction(S);
   delete CGF.CapturedStmtInfo;
 
   // Emit call to the helper function.
@@ -1813,13 +2120,21 @@ CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) {
   return F;
 }
 
+llvm::Value *
+CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) {
+  LValue CapStruct = InitCapturedStruct(*this, S);
+  return CapStruct.getAddress();
+}
+
 /// Creates the outlined function for a CapturedStmt.
 llvm::Function *
-CodeGenFunction::GenerateCapturedStmtFunction(const CapturedDecl *CD,
-                                              const RecordDecl *RD,
-                                              SourceLocation Loc) {
+CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
   assert(CapturedStmtInfo &&
     "CapturedStmtInfo should be set when generating the captured function");
+  const CapturedDecl *CD = S.getCapturedDecl();
+  const RecordDecl *RD = S.getCapturedRecordDecl();
+  SourceLocation Loc = S.getLocStart();
+  assert(CD->hasBody() && "missing CapturedDecl body");
 
   // Build the argument list.
   ASTContext &Ctx = CGM.getContext();
@@ -1829,8 +2144,8 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedDecl *CD,
   // Create the function declaration.
   FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
-    CGM.getTypes().arrangeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
-                                              /*IsVariadic=*/false);
+      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
+                                                    /*IsVariadic=*/false);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
   llvm::Function *F =
@@ -1839,13 +2154,17 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedDecl *CD,
   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
 
   // Generate the function.
-  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getBody()->getLocStart());
-
+  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args,
+                CD->getLocation(),
+                CD->getBody()->getLocStart());
   // Set the context parameter in CapturedStmtInfo.
   llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()];
   assert(DeclPtr && "missing context parameter for CapturedStmt");
   CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
 
+  // Initialize variable-length arrays.
+  InitVLACaptures(*this, S);
+
   // If 'this' is captured, load it into CXXThisValue.
   if (CapturedStmtInfo->isCXXThisExprCaptured()) {
     FieldDecl *FD = CapturedStmtInfo->getThisFieldDecl();
@@ -1855,8 +2174,11 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedDecl *CD,
     CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
   }
 
+  PGO.assignRegionCounters(CD, F);
   CapturedStmtInfo->EmitBody(*this, CD->getBody());
   FinishFunction(CD->getBodyRBrace());
+  PGO.emitInstrumentationData();
+  PGO.destroyRegionCounters();
 
   return F;
 }
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
new file mode 100644
index 000000000000..867f41592dd9
--- /dev/null
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -0,0 +1,130 @@
+//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit OpenMP nodes as LLVM code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/StmtOpenMP.h"
+using namespace clang;
+using namespace CodeGen;
+
+//===----------------------------------------------------------------------===//
+//                              OpenMP Directive Emission
+//===----------------------------------------------------------------------===//
+
+void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+  const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS);
+
+  llvm::Value *OutlinedFn;
+  {
+    CodeGenFunction CGF(CGM, true);
+    CGCapturedStmtInfo CGInfo(*CS, CS->getCapturedRegionKind());
+    CGF.CapturedStmtInfo = &CGInfo;
+    OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS);
+  }
+
+  // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
+  llvm::Value *Args[] = {
+      CGM.getOpenMPRuntime().EmitOpenMPUpdateLocation(*this, S.getLocStart()),
+      Builder.getInt32(1), // Number of arguments after 'microtask' argument
+      // (there is only one additional argument - 'context')
+      Builder.CreateBitCast(OutlinedFn,
+                            CGM.getOpenMPRuntime().getKmpc_MicroPointerTy()),
+      EmitCastToVoidPtr(CapturedStruct)};
+  llvm::Constant *RTLFn = CGM.getOpenMPRuntime().CreateRuntimeFunction(
+      CGOpenMPRuntime::OMPRTL__kmpc_fork_call);
+  EmitRuntimeCall(RTLFn, Args);
+}
+
+void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+  const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  const Stmt *Body = CS->getCapturedStmt();
+  LoopStack.setParallel();
+  LoopStack.setVectorizerEnable(true);
+  for (auto C : S.clauses()) {
+    switch (C->getClauseKind()) {
+    case OMPC_safelen: {
+      RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
+                               AggValueSlot::ignored(), true);
+      llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+      LoopStack.setVectorizerWidth(Val->getZExtValue());
+      // In presence of finite 'safelen', it may be unsafe to mark all
+      // the memory instructions parallel, because loop-carried
+      // dependences of 'safelen' iterations are possible.
+      LoopStack.setParallel(false);
+      break;
+    }
+    default:
+      // Not handled yet
+      ;
+    }
+  }
+  EmitStmt(Body);
+}
+
+void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &) {
+  llvm_unreachable("CodeGen for 'omp for' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) {
+  llvm_unreachable("CodeGen for 'omp sections' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) {
+  llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &) {
+  llvm_unreachable("CodeGen for 'omp single' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &) {
+  llvm_unreachable("CodeGen for 'omp master' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &) {
+  llvm_unreachable("CodeGen for 'omp critical' is not supported yet.");
+}
+
+void
+CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
+  llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPParallelSectionsDirective(
+    const OMPParallelSectionsDirective &) {
+  llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
+  llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) {
+  llvm_unreachable("CodeGen for 'omp taskyield' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &) {
+  llvm_unreachable("CodeGen for 'omp barrier' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
+  llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
+}
+
+void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &) {
+  llvm_unreachable("CodeGen for 'omp flush' is not supported yet.");
+}
+
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index bfff47058897..bd280ea14e8d 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -66,7 +66,8 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
     if (VTTVT.getBase() == RD) {
       // Just get the address point for the regular vtable.
       AddressPoint =
-          ItaniumVTContext.getVTableLayout(RD).getAddressPoint(i->VTableBase);
+          getItaniumVTableContext().getVTableLayout(RD).getAddressPoint(
+              i->VTableBase);
       assert(AddressPoint != 0 && "Did not find vtable address point!");
     } else {
       AddressPoint = VTableAddressPoints[i->VTableIndex].lookup(i->VTableBase);
@@ -94,7 +95,7 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
   VTT->setLinkage(Linkage);
 
   // Set the right visibility.
-  CGM.setTypeVisibility(VTT, RD, CodeGenModule::TVK_ForVTT);
+  CGM.setGlobalVisibility(VTT, RD);
 }
 
 llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index f28d9b67a8f3..0df2c43d11b5 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -30,14 +30,7 @@ using namespace clang;
 using namespace CodeGen;
 
 CodeGenVTables::CodeGenVTables(CodeGenModule &CGM)
-  : CGM(CGM), ItaniumVTContext(CGM.getContext()) {
-  if (CGM.getTarget().getCXXABI().isMicrosoft()) {
-    // FIXME: Eventually, we should only have one of V*TContexts available.
-    // Today we use both in the Microsoft ABI as MicrosoftVFTableContext
-    // is not completely supported in CodeGen yet.
-    MicrosoftVTContext.reset(new MicrosoftVTableContext(CGM.getContext()));
-  }
-}
+    : CGM(CGM), VTContext(CGM.getContext().getVTableContext()) {}
 
 llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD, 
                                               const ThunkInfo &Thunk) {
@@ -54,54 +47,13 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD,
   Out.flush();
 
   llvm::Type *Ty = getTypes().GetFunctionTypeForVTable(GD);
-  return GetOrCreateLLVMFunction(Name, Ty, GD, /*ForVTable=*/true);
+  return GetOrCreateLLVMFunction(Name, Ty, GD, /*ForVTable=*/true,
+                                 /*DontDefer*/ true);
 }
 
 static void setThunkVisibility(CodeGenModule &CGM, const CXXMethodDecl *MD,
                                const ThunkInfo &Thunk, llvm::Function *Fn) {
   CGM.setGlobalVisibility(Fn, MD);
-
-  if (!CGM.getCodeGenOpts().HiddenWeakVTables)
-    return;
-
-  // If the thunk has weak/linkonce linkage, but the function must be
-  // emitted in every translation unit that references it, then we can
-  // emit its thunks with hidden visibility, since its thunks must be
-  // emitted when the function is.
-
-  // This follows CodeGenModule::setTypeVisibility; see the comments
-  // there for explanation.
-
-  if ((Fn->getLinkage() != llvm::GlobalVariable::LinkOnceODRLinkage &&
-       Fn->getLinkage() != llvm::GlobalVariable::WeakODRLinkage) ||
-      Fn->getVisibility() != llvm::GlobalVariable::DefaultVisibility)
-    return;
-
-  if (MD->getExplicitVisibility(ValueDecl::VisibilityForValue))
-    return;
-
-  switch (MD->getTemplateSpecializationKind()) {
-  case TSK_ExplicitInstantiationDefinition:
-  case TSK_ExplicitInstantiationDeclaration:
-    return;
-
-  case TSK_Undeclared:
-    break;
-
-  case TSK_ExplicitSpecialization:
-  case TSK_ImplicitInstantiation:
-    return;
-    break;
-  }
-
-  // If there's an explicit definition, and that definition is
-  // out-of-line, then we can't assume that all users will have a
-  // definition to emit.
-  const FunctionDecl *Def = 0;
-  if (MD->hasBody(Def) && Def->isOutOfLine())
-    return;
-
-  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
 }
 
 #ifndef NDEBUG
@@ -119,11 +71,11 @@ static RValue PerformReturnAdjustment(CodeGenFunction &CGF,
                                       const ThunkInfo &Thunk) {
   // Emit the return adjustment.
   bool NullCheckValue = !ResultType->isReferenceType();
-  
-  llvm::BasicBlock *AdjustNull = 0;
-  llvm::BasicBlock *AdjustNotNull = 0;
-  llvm::BasicBlock *AdjustEnd = 0;
-  
+
+  llvm::BasicBlock *AdjustNull = nullptr;
+  llvm::BasicBlock *AdjustNotNull = nullptr;
+  llvm::BasicBlock *AdjustEnd = nullptr;
+
   llvm::Value *ReturnValue = RV.getScalarVal();
 
   if (NullCheckValue) {
@@ -177,7 +129,7 @@ void CodeGenFunction::GenerateVarArgsThunk(
                                       GlobalDecl GD, const ThunkInfo &Thunk) {
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
   const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
-  QualType ResultType = FPT->getResultType();
+  QualType ResultType = FPT->getReturnType();
 
   // Get the original function
   assert(FnInfo.isVariadic());
@@ -207,7 +159,7 @@ void CodeGenFunction::GenerateVarArgsThunk(
   // with "this".
   llvm::Value *ThisPtr = &*AI;
   llvm::BasicBlock *EntryBB = Fn->begin();
-  llvm::Instruction *ThisStore = 0;
+  llvm::Instruction *ThisStore = nullptr;
   for (llvm::BasicBlock::iterator I = EntryBB->begin(), E = EntryBB->end();
        I != E; I++) {
     if (isa<llvm::StoreInst>(I) && I->getOperand(0) == ThisPtr) {
@@ -248,11 +200,11 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
   QualType ThisType = MD->getThisType(getContext());
   const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
   QualType ResultType =
-    CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getResultType();
+      CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getReturnType();
   FunctionArgList FunctionArgs;
 
   // Create the implicit 'this' parameter declaration.
-  CGM.getCXXABI().BuildInstanceFunctionParams(*this, ResultType, FunctionArgs);
+  CGM.getCXXABI().buildThisParam(*this, FunctionArgs);
 
   // Add the rest of the parameters.
   for (FunctionDecl::param_const_iterator I = MD->param_begin(),
@@ -260,9 +212,12 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
        I != E; ++I)
     FunctionArgs.push_back(*I);
 
+  if (isa<CXXDestructorDecl>(MD))
+    CGM.getCXXABI().addImplicitStructorParams(*this, ResultType, FunctionArgs);
+
   // Start defining the function.
   StartFunction(GlobalDecl(), ResultType, Fn, FnInfo, FunctionArgs,
-                SourceLocation());
+                MD->getLocation(), SourceLocation());
 
   // Since we didn't pass a GlobalDecl to StartFunction, do this ourselves.
   CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
@@ -316,7 +271,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(GlobalDecl GD,
 
   // Determine whether we have a return value slot to use.
   QualType ResultType =
-    CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getResultType();
+      CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getReturnType();
   ReturnValueSlot Slot;
   if (!ResultType->isVoidType() &&
       CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
@@ -366,27 +321,30 @@ void CodeGenVTables::emitThunk(GlobalDecl GD, const ThunkInfo &Thunk,
   const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeGlobalDeclaration(GD);
 
   // FIXME: re-use FnInfo in this computation.
-  llvm::Constant *Entry = CGM.GetAddrOfThunk(GD, Thunk);
-  
+  llvm::Constant *C = CGM.GetAddrOfThunk(GD, Thunk);
+  llvm::GlobalValue *Entry;
+
   // Strip off a bitcast if we got one back.
-  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
+  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(C)) {
     assert(CE->getOpcode() == llvm::Instruction::BitCast);
-    Entry = CE->getOperand(0);
+    Entry = cast<llvm::GlobalValue>(CE->getOperand(0));
+  } else {
+    Entry = cast<llvm::GlobalValue>(C);
   }
-  
+
   // There's already a declaration with the same name, check if it has the same
   // type or if we need to replace it.
-  if (cast<llvm::GlobalValue>(Entry)->getType()->getElementType() != 
+  if (Entry->getType()->getElementType() !=
       CGM.getTypes().GetFunctionTypeForVTable(GD)) {
-    llvm::GlobalValue *OldThunkFn = cast<llvm::GlobalValue>(Entry);
-    
+    llvm::GlobalValue *OldThunkFn = Entry;
+
     // If the types mismatch then we have to rewrite the definition.
     assert(OldThunkFn->isDeclaration() &&
            "Shouldn't replace non-declaration");
 
     // Remove the name from the old thunk function and get a new thunk.
     OldThunkFn->setName(StringRef());
-    Entry = CGM.GetAddrOfThunk(GD, Thunk);
+    Entry = cast<llvm::GlobalValue>(CGM.GetAddrOfThunk(GD, Thunk));
     
     // If needed, replace the old thunk with a bitcast.
     if (!OldThunkFn->use_empty()) {
@@ -424,12 +382,14 @@ void CodeGenVTables::emitThunk(GlobalDecl GD, const ThunkInfo &Thunk,
     // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
     if (!UseAvailableExternallyLinkage) {
       CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, Thunk);
-      CGM.getCXXABI().setThunkLinkage(ThunkFn, ForVTable);
+      CGM.getCXXABI().setThunkLinkage(ThunkFn, ForVTable, GD,
+                                      !Thunk.Return.isEmpty());
     }
   } else {
     // Normal thunk body generation.
     CodeGenFunction(CGM).GenerateThunk(ThunkFn, FnInfo, GD, Thunk);
-    CGM.getCXXABI().setThunkLinkage(ThunkFn, ForVTable);
+    CGM.getCXXABI().setThunkLinkage(ThunkFn, ForVTable, GD,
+                                    !Thunk.Return.isEmpty());
   }
 }
 
@@ -461,12 +421,8 @@ void CodeGenVTables::EmitThunks(GlobalDecl GD)
   if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
     return;
 
-  const VTableContextBase::ThunkInfoVectorTy *ThunkInfoVector;
-  if (MicrosoftVTContext.isValid()) {
-    ThunkInfoVector = MicrosoftVTContext->getThunkInfo(GD);
-  } else {
-    ThunkInfoVector = ItaniumVTContext.getThunkInfo(GD);
-  }
+  const VTableContextBase::ThunkInfoVectorTy *ThunkInfoVector =
+      VTContext->getThunkInfo(GD);
 
   if (!ThunkInfoVector)
     return;
@@ -475,12 +431,10 @@ void CodeGenVTables::EmitThunks(GlobalDecl GD)
     emitThunk(GD, (*ThunkInfoVector)[I], /*ForVTable=*/false);
 }
 
-llvm::Constant *
-CodeGenVTables::CreateVTableInitializer(const CXXRecordDecl *RD,
-                                        const VTableComponent *Components, 
-                                        unsigned NumComponents,
-                                const VTableLayout::VTableThunkTy *VTableThunks,
-                                        unsigned NumVTableThunks) {
+llvm::Constant *CodeGenVTables::CreateVTableInitializer(
+    const CXXRecordDecl *RD, const VTableComponent *Components,
+    unsigned NumComponents, const VTableLayout::VTableThunkTy *VTableThunks,
+    unsigned NumVTableThunks, llvm::Constant *RTTI) {
   SmallVector<llvm::Constant *, 64> Inits;
 
   llvm::Type *Int8PtrTy = CGM.Int8PtrTy;
@@ -488,17 +442,14 @@ CodeGenVTables::CreateVTableInitializer(const CXXRecordDecl *RD,
   llvm::Type *PtrDiffTy = 
     CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
 
-  QualType ClassType = CGM.getContext().getTagDeclType(RD);
-  llvm::Constant *RTTI = CGM.GetAddrOfRTTIDescriptor(ClassType);
-  
   unsigned NextVTableThunkIndex = 0;
-  
-  llvm::Constant *PureVirtualFn = 0, *DeletedVirtualFn = 0;
+
+  llvm::Constant *PureVirtualFn = nullptr, *DeletedVirtualFn = nullptr;
 
   for (unsigned I = 0; I != NumComponents; ++I) {
     VTableComponent Component = Components[I];
 
-    llvm::Constant *Init = 0;
+    llvm::Constant *Init = nullptr;
 
     switch (Component.getKind()) {
     case VTableComponent::CK_VCallOffset:
@@ -603,8 +554,8 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
     DI->completeClassData(Base.getBase());
 
-  OwningPtr<VTableLayout> VTLayout(
-      ItaniumVTContext.createConstructionVTableLayout(
+  std::unique_ptr<VTableLayout> VTLayout(
+      getItaniumVTableContext().createConstructionVTableLayout(
           Base.getBase(), Base.getBaseOffset(), BaseIsVirtual, RD));
 
   // Add the address points.
@@ -633,18 +584,19 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   // Create the variable that will hold the construction vtable.
   llvm::GlobalVariable *VTable = 
     CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, Linkage);
-  CGM.setTypeVisibility(VTable, RD, CodeGenModule::TVK_ForConstructionVTable);
+  CGM.setGlobalVisibility(VTable, RD);
 
   // V-tables are always unnamed_addr.
   VTable->setUnnamedAddr(true);
 
+  llvm::Constant *RTTI = CGM.GetAddrOfRTTIDescriptor(
+      CGM.getContext().getTagDeclType(Base.getBase()));
+
   // Create and set the initializer.
-  llvm::Constant *Init = 
-    CreateVTableInitializer(Base.getBase(), 
-                            VTLayout->vtable_component_begin(), 
-                            VTLayout->getNumVTableComponents(),
-                            VTLayout->vtable_thunk_begin(),
-                            VTLayout->getNumVTableThunks());
+  llvm::Constant *Init = CreateVTableInitializer(
+      Base.getBase(), VTLayout->vtable_component_begin(),
+      VTLayout->getNumVTableComponents(), VTLayout->vtable_thunk_begin(),
+      VTLayout->getNumVTableThunks(), RTTI);
   VTable->setInitializer(Init);
   
   return VTable;
@@ -663,7 +615,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
   if (const CXXMethodDecl *keyFunction = Context.getCurrentKeyFunction(RD)) {
     // If this class has a key function, use that to determine the
     // linkage of the vtable.
-    const FunctionDecl *def = 0;
+    const FunctionDecl *def = nullptr;
     if (keyFunction->hasBody(def))
       keyFunction = cast<CXXMethodDecl>(def);
     
@@ -697,18 +649,31 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
   // internal linkage.
   if (Context.getLangOpts().AppleKext)
     return llvm::Function::InternalLinkage;
-  
+
+  llvm::GlobalVariable::LinkageTypes DiscardableODRLinkage =
+      llvm::GlobalValue::LinkOnceODRLinkage;
+  llvm::GlobalVariable::LinkageTypes NonDiscardableODRLinkage =
+      llvm::GlobalValue::WeakODRLinkage;
+  if (RD->hasAttr<DLLExportAttr>()) {
+    // Cannot discard exported vtables.
+    DiscardableODRLinkage = NonDiscardableODRLinkage;
+  } else if (RD->hasAttr<DLLImportAttr>()) {
+    // Imported vtables are available externally.
+    DiscardableODRLinkage = llvm::GlobalVariable::AvailableExternallyLinkage;
+    NonDiscardableODRLinkage = llvm::GlobalVariable::AvailableExternallyLinkage;
+  }
+
   switch (RD->getTemplateSpecializationKind()) {
   case TSK_Undeclared:
   case TSK_ExplicitSpecialization:
   case TSK_ImplicitInstantiation:
-    return llvm::GlobalVariable::LinkOnceODRLinkage;
+    return DiscardableODRLinkage;
 
   case TSK_ExplicitInstantiationDeclaration:
     llvm_unreachable("Should not have been asked to emit this");
 
   case TSK_ExplicitInstantiationDefinition:
-      return llvm::GlobalVariable::WeakODRLinkage;
+    return NonDiscardableODRLinkage;
   }
 
   llvm_unreachable("Invalid TemplateSpecializationKind!");
@@ -752,7 +717,7 @@ CodeGenVTables::GenerateClassData(const CXXRecordDecl *RD) {
 /// strongly elsewhere.  Otherwise, we'd just like to avoid emitting
 /// v-tables when unnecessary.
 bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
-  assert(RD->isDynamicClass() && "Non dynamic classes have no VTable.");
+  assert(RD->isDynamicClass() && "Non-dynamic classes have no VTable.");
 
   // If we have an explicit instantiation declaration (and not a
   // definition), the v-table is defined elsewhere.
diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h
index e8cd55eed80a..69cf079567e3 100644
--- a/lib/CodeGen/CGVTables.h
+++ b/lib/CodeGen/CGVTables.h
@@ -31,11 +31,8 @@ namespace CodeGen {
 class CodeGenVTables {
   CodeGenModule &CGM;
 
-  // FIXME: Consider moving ItaniumVTContext and MicrosoftVTContext into
-  // respective CXXABI classes?
-  ItaniumVTableContext ItaniumVTContext;
-  OwningPtr<MicrosoftVTableContext> MicrosoftVTContext;
-  
+  VTableContextBase *VTContext;
+
   /// VTableAddressPointsMapTy - Address points for a single vtable.
   typedef llvm::DenseMap<BaseSubobject, uint64_t> VTableAddressPointsMapTy;
 
@@ -64,18 +61,19 @@ public:
   /// decl.
   /// \param Components - The vtable components; this is really an array of
   /// VTableComponents.
-  llvm::Constant *CreateVTableInitializer(const CXXRecordDecl *RD,
-                                          const VTableComponent *Components, 
-                                          unsigned NumComponents,
-                                const VTableLayout::VTableThunkTy *VTableThunks,
-                                          unsigned NumVTableThunks);
+  llvm::Constant *CreateVTableInitializer(
+      const CXXRecordDecl *RD, const VTableComponent *Components,
+      unsigned NumComponents, const VTableLayout::VTableThunkTy *VTableThunks,
+      unsigned NumVTableThunks, llvm::Constant *RTTI);
 
   CodeGenVTables(CodeGenModule &CGM);
 
-  ItaniumVTableContext &getItaniumVTableContext() { return ItaniumVTContext; }
+  ItaniumVTableContext &getItaniumVTableContext() {
+    return *cast<ItaniumVTableContext>(VTContext);
+  }
 
   MicrosoftVTableContext &getMicrosoftVTableContext() {
-    return *MicrosoftVTContext.get();
+    return *cast<MicrosoftVTableContext>(VTContext);
   }
 
   /// getSubVTTIndex - Return the index of the sub-VTT for the base class of the
@@ -100,7 +98,7 @@ public:
                              VTableAddressPointsMapTy& AddressPoints);
 
     
-  /// GetAddrOfVTable - Get the address of the VTT for the given record decl.
+  /// GetAddrOfVTT - Get the address of the VTT for the given record decl.
   llvm::GlobalVariable *GetAddrOfVTT(const CXXRecordDecl *RD);
 
   /// EmitVTTDefinition - Emit the definition of the given vtable.
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index da2a03437d3d..956f3247bd82 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -110,7 +110,8 @@ class LValue {
     Simple,       // This is a normal l-value, use getAddress().
     VectorElt,    // This is a vector element l-value (V[i]), use getVector*
     BitField,     // This is a bitfield l-value, use getBitfield*.
-    ExtVectorElt  // This is an extended vector subset, use getExtVectorComp
+    ExtVectorElt, // This is an extended vector subset, use getExtVectorComp
+    GlobalReg     // This is a register l-value, use getGlobalReg()
   } LVType;
 
   llvm::Value *V;
@@ -168,7 +169,7 @@ class LValue {
 private:
   void Initialize(QualType Type, Qualifiers Quals,
                   CharUnits Alignment,
-                  llvm::MDNode *TBAAInfo = 0) {
+                  llvm::MDNode *TBAAInfo = nullptr) {
     this->Type = Type;
     this->Quals = Quals;
     this->Alignment = Alignment.getQuantity();
@@ -179,7 +180,7 @@ private:
     this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
     this->ImpreciseLifetime = false;
     this->ThreadLocalRef = false;
-    this->BaseIvarExp = 0;
+    this->BaseIvarExp = nullptr;
 
     // Initialize fields for TBAA.
     this->TBAABaseType = Type;
@@ -192,6 +193,7 @@ public:
   bool isVectorElt() const { return LVType == VectorElt; }
   bool isBitField() const { return LVType == BitField; }
   bool isExtVectorElt() const { return LVType == ExtVectorElt; }
+  bool isGlobalReg() const { return LVType == GlobalReg; }
 
   bool isVolatileQualified() const { return Quals.hasVolatile(); }
   bool isRestrictQualified() const { return Quals.hasRestrict(); }
@@ -286,9 +288,12 @@ public:
     return *BitFieldInfo;
   }
 
+  // global register lvalue
+  llvm::Value *getGlobalReg() const { assert(isGlobalReg()); return V; }
+
   static LValue MakeAddr(llvm::Value *address, QualType type,
                          CharUnits alignment, ASTContext &Context,
-                         llvm::MDNode *TBAAInfo = 0) {
+                         llvm::MDNode *TBAAInfo = nullptr) {
     Qualifiers qs = type.getQualifiers();
     qs.setObjCGCAttr(Context.getObjCGCAttrKind(type));
 
@@ -336,6 +341,16 @@ public:
     return R;
   }
 
+  static LValue MakeGlobalReg(llvm::Value *Reg,
+                              QualType type,
+                              CharUnits Alignment) {
+    LValue R;
+    R.LVType = GlobalReg;
+    R.V = Reg;
+    R.Initialize(type, type.getQualifiers(), Alignment);
+    return R;
+  }
+
   RValue asAggregateRValue() const {
     // FIMXE: Alignment
     return RValue::getAggregate(getAddress(), isVolatileQualified());
@@ -390,7 +405,7 @@ public:
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
   static AggValueSlot ignored() {
-    return forAddr(0, CharUnits(), Qualifiers(), IsNotDestructed,
+    return forAddr(nullptr, CharUnits(), Qualifiers(), IsNotDestructed,
                    DoesNotNeedGCBarriers, IsNotAliased);
   }
 
@@ -460,7 +475,7 @@ public:
   }
 
   bool isIgnored() const {
-    return Addr == 0;
+    return Addr == nullptr;
   }
 
   CharUnits getAlignment() const {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 83dbbf0d3460..00a87d87a290 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,12 +1,19 @@
 set(LLVM_LINK_COMPONENTS
-  asmparser
-  bitreader
-  bitwriter
-  irreader
-  instrumentation
-  ipo
-  linker
-  vectorize
+  BitReader
+  BitWriter
+  Core
+  IPO
+  IRReader
+  InstCombine
+  Instrumentation
+  Linker
+  MC
+  ObjCARCOpts
+  ProfileData
+  ScalarOpts
+  Support
+  Target
+  TransformUtils
   )
 
 add_clang_library(clangCodeGen
@@ -14,12 +21,12 @@ add_clang_library(clangCodeGen
   CGAtomic.cpp
   CGBlocks.cpp
   CGBuiltin.cpp
-  CGCall.cpp
-  CGClass.cpp
   CGCUDANV.cpp
   CGCUDARuntime.cpp
   CGCXX.cpp
   CGCXXABI.cpp
+  CGCall.cpp
+  CGClass.cpp
   CGCleanup.cpp
   CGDebugInfo.cpp
   CGDecl.cpp
@@ -27,46 +34,40 @@ add_clang_library(clangCodeGen
   CGException.cpp
   CGExpr.cpp
   CGExprAgg.cpp
+  CGExprCXX.cpp
   CGExprComplex.cpp
   CGExprConstant.cpp
-  CGExprCXX.cpp
   CGExprScalar.cpp
+  CGLoopInfo.cpp
   CGObjC.cpp
   CGObjCGNU.cpp
   CGObjCMac.cpp
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
+  CGOpenMPRuntime.cpp
   CGRecordLayoutBuilder.cpp
-  CGRTTI.cpp
   CGStmt.cpp
-  CGVTables.cpp
+  CGStmtOpenMP.cpp
   CGVTT.cpp
+  CGVTables.cpp
   CodeGenABITypes.cpp
   CodeGenAction.cpp
   CodeGenFunction.cpp
   CodeGenModule.cpp
+  CodeGenPGO.cpp
   CodeGenTBAA.cpp
   CodeGenTypes.cpp
   ItaniumCXXABI.cpp
   MicrosoftCXXABI.cpp
-  MicrosoftVBTables.cpp
   ModuleBuilder.cpp
+  SanitizerBlacklist.cpp
   TargetInfo.cpp
-  )
 
-add_dependencies(clangCodeGen
-  ClangARMNeon
-  ClangAttrClasses
-  ClangAttrList
-  ClangCommentNodes
-  ClangDeclNodes
-  ClangDiagnosticCommon
-  ClangDiagnosticFrontend
-  ClangStmtNodes
-  )
+  DEPENDS
+  intrinsics_gen
 
-target_link_libraries(clangCodeGen
-  clangBasic
+  LINK_LIBS
   clangAST
+  clangBasic
   clangFrontend
   )
diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp
index 18c836cf2f4c..180cd51940ac 100644
--- a/lib/CodeGen/CodeGenABITypes.cpp
+++ b/lib/CodeGen/CodeGenABITypes.cpp
@@ -17,23 +17,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/CodeGen/CodeGenABITypes.h"
-
-#include "clang/CodeGen/CGFunctionInfo.h"
 #include "CodeGenModule.h"
+#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/Frontend/CodeGenOptions.h"
 
 using namespace clang;
 using namespace CodeGen;
 
 CodeGenABITypes::CodeGenABITypes(ASTContext &C,
-                                 const CodeGenOptions &CodeGenOpts,
                                  llvm::Module &M,
-                                 const llvm::DataLayout &TD,
-                                 DiagnosticsEngine &Diags)
-  : CGM(new CodeGen::CodeGenModule(C, CodeGenOpts, M, TD, Diags)) {
+                                 const llvm::DataLayout &TD)
+  : CGO(new CodeGenOptions),
+    CGM(new CodeGen::CodeGenModule(C, *CGO, M, TD, C.getDiagnostics())) {
 }
 
 CodeGenABITypes::~CodeGenABITypes()
 {
+  delete CGO;
   delete CGM;
 }
 
@@ -60,10 +60,10 @@ CodeGenABITypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeLLVMFunctionInfo(CanQualType returnType,
-                                         llvm::ArrayRef<CanQualType> argTypes,
+CodeGenABITypes::arrangeFreeFunctionCall(CanQualType returnType,
+                                         ArrayRef<CanQualType> argTypes,
                                          FunctionType::ExtInfo info,
                                          RequiredArgs args) {
-  return CGM->getTypes().arrangeLLVMFunctionInfo(returnType, argTypes,
-                                                info, args);
+  return CGM->getTypes().arrangeLLVMFunctionInfo(
+      returnType, /*IsInstanceMethod=*/false, argTypes, info, args);
 }
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 3072204c9b66..04d2cd9d53e4 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -11,6 +11,7 @@
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclGroup.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
@@ -18,17 +19,20 @@
 #include "clang/CodeGen/ModuleBuilder.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker.h"
+#include "llvm/Linker/Linker.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/Timer.h"
+#include <memory>
 using namespace clang;
 using namespace llvm;
 
@@ -45,42 +49,33 @@ namespace clang {
 
     Timer LLVMIRGeneration;
 
-    OwningPtr<CodeGenerator> Gen;
+    std::unique_ptr<CodeGenerator> Gen;
 
-    OwningPtr<llvm::Module> TheModule, LinkModule;
+    std::unique_ptr<llvm::Module> TheModule, LinkModule;
 
   public:
     BackendConsumer(BackendAction action, DiagnosticsEngine &_Diags,
                     const CodeGenOptions &compopts,
                     const TargetOptions &targetopts,
-                    const LangOptions &langopts,
-                    bool TimePasses,
-                    const std::string &infile,
-                    llvm::Module *LinkModule,
-                    raw_ostream *OS,
-                    LLVMContext &C) :
-      Diags(_Diags),
-      Action(action),
-      CodeGenOpts(compopts),
-      TargetOpts(targetopts),
-      LangOpts(langopts),
-      AsmOutStream(OS),
-      Context(), 
-      LLVMIRGeneration("LLVM IR Generation Time"),
-      Gen(CreateLLVMCodeGen(Diags, infile, compopts, targetopts, C)),
-      LinkModule(LinkModule)
-    {
+                    const LangOptions &langopts, bool TimePasses,
+                    const std::string &infile, llvm::Module *LinkModule,
+                    raw_ostream *OS, LLVMContext &C)
+        : Diags(_Diags), Action(action), CodeGenOpts(compopts),
+          TargetOpts(targetopts), LangOpts(langopts), AsmOutStream(OS),
+          Context(), LLVMIRGeneration("LLVM IR Generation Time"),
+          Gen(CreateLLVMCodeGen(Diags, infile, compopts, targetopts, C)),
+          LinkModule(LinkModule) {
       llvm::TimePassesIsEnabled = TimePasses;
     }
 
-    llvm::Module *takeModule() { return TheModule.take(); }
-    llvm::Module *takeLinkModule() { return LinkModule.take(); }
+    llvm::Module *takeModule() { return TheModule.release(); }
+    llvm::Module *takeLinkModule() { return LinkModule.release(); }
 
-    virtual void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+    void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
       Gen->HandleCXXStaticMemberVarInstantiation(VD);
     }
 
-    virtual void Initialize(ASTContext &Ctx) {
+    void Initialize(ASTContext &Ctx) override {
       Context = &Ctx;
 
       if (llvm::TimePassesIsEnabled)
@@ -94,7 +89,7 @@ namespace clang {
         LLVMIRGeneration.stopTimer();
     }
 
-    virtual bool HandleTopLevelDecl(DeclGroupRef D) {
+    bool HandleTopLevelDecl(DeclGroupRef D) override {
       PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(),
                                      Context->getSourceManager(),
                                      "LLVM IR generation of declaration");
@@ -110,7 +105,20 @@ namespace clang {
       return true;
     }
 
-    virtual void HandleTranslationUnit(ASTContext &C) {
+    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+      PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
+                                     Context->getSourceManager(),
+                                     "LLVM IR generation of inline method");
+      if (llvm::TimePassesIsEnabled)
+        LLVMIRGeneration.startTimer();
+
+      Gen->HandleInlineMethodDefinition(D);
+
+      if (llvm::TimePassesIsEnabled)
+        LLVMIRGeneration.stopTimer();
+    }
+
+    void HandleTranslationUnit(ASTContext &C) override {
       {
         PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
         if (llvm::TimePassesIsEnabled)
@@ -132,7 +140,7 @@ namespace clang {
       if (!M) {
         // The module has been released by IR gen on failures, do not double
         // free.
-        TheModule.take();
+        TheModule.release();
         return;
       }
 
@@ -158,41 +166,49 @@ namespace clang {
       void *OldContext = Ctx.getInlineAsmDiagnosticContext();
       Ctx.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, this);
 
+      LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+          Ctx.getDiagnosticHandler();
+      void *OldDiagnosticContext = Ctx.getDiagnosticContext();
+      Ctx.setDiagnosticHandler(DiagnosticHandler, this);
+
       EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
+                        C.getTargetInfo().getTargetDescription(),
                         TheModule.get(), Action, AsmOutStream);
-      
+
       Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
+
+      Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext);
     }
 
-    virtual void HandleTagDeclDefinition(TagDecl *D) {
+    void HandleTagDeclDefinition(TagDecl *D) override {
       PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
                                      Context->getSourceManager(),
                                      "LLVM IR generation of declaration");
       Gen->HandleTagDeclDefinition(D);
     }
 
-    virtual void HandleTagDeclRequiredDefinition(const TagDecl *D) {
+    void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
       Gen->HandleTagDeclRequiredDefinition(D);
     }
 
-    virtual void CompleteTentativeDefinition(VarDecl *D) {
+    void CompleteTentativeDefinition(VarDecl *D) override {
       Gen->CompleteTentativeDefinition(D);
     }
 
-    virtual void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) {
+    void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) override {
       Gen->HandleVTable(RD, DefinitionRequired);
     }
 
-    virtual void HandleLinkerOptionPragma(llvm::StringRef Opts) {
+    void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
       Gen->HandleLinkerOptionPragma(Opts);
     }
 
-    virtual void HandleDetectMismatch(llvm::StringRef Name,
-                                      llvm::StringRef Value) {
+    void HandleDetectMismatch(llvm::StringRef Name,
+                                      llvm::StringRef Value) override {
       Gen->HandleDetectMismatch(Name, Value);
     }
 
-    virtual void HandleDependentLibrary(llvm::StringRef Opts) {
+    void HandleDependentLibrary(llvm::StringRef Opts) override {
       Gen->HandleDependentLibrary(Opts);
     }
 
@@ -202,8 +218,36 @@ namespace clang {
       ((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc);
     }
 
+    static void DiagnosticHandler(const llvm::DiagnosticInfo &DI,
+                                  void *Context) {
+      ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI);
+    }
+
     void InlineAsmDiagHandler2(const llvm::SMDiagnostic &,
                                SourceLocation LocCookie);
+
+    void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI);
+    /// \brief Specialized handler for InlineAsm diagnostic.
+    /// \return True if the diagnostic has been successfully reported, false
+    /// otherwise.
+    bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D);
+    /// \brief Specialized handler for StackSize diagnostic.
+    /// \return True if the diagnostic has been successfully reported, false
+    /// otherwise.
+    bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
+    /// \brief Specialized handlers for optimization remarks.
+    /// Note that these handlers only accept remarks and they always handle
+    /// them.
+    void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D,
+                                 unsigned DiagID);
+    void
+    OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationRemark &D);
+    void OptimizationRemarkHandler(
+        const llvm::DiagnosticInfoOptimizationRemarkMissed &D);
+    void OptimizationRemarkHandler(
+        const llvm::DiagnosticInfoOptimizationRemarkAnalysis &D);
+    void OptimizationFailureHandler(
+        const llvm::DiagnosticInfoOptimizationFailure &D);
   };
   
   void BackendConsumer::anchor() {}
@@ -224,13 +268,15 @@ static FullSourceLoc ConvertBackendLocation(const llvm::SMDiagnostic &D,
   LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(D.getLoc()));
 
   // Create the copy and transfer ownership to clang::SourceManager.
+  // TODO: Avoid copying files into memory.
   llvm::MemoryBuffer *CBuf =
   llvm::MemoryBuffer::getMemBufferCopy(LBuf->getBuffer(),
                                        LBuf->getBufferIdentifier());
-  FileID FID = CSM.createFileIDForMemBuffer(CBuf);
+  // FIXME: Keep a file ID map instead of creating new IDs for each location.
+  FileID FID = CSM.createFileID(CBuf);
 
   // Translate the offset into the file.
-  unsigned Offset = D.getLoc().getPointer()  - LBuf->getBufferStart();
+  unsigned Offset = D.getLoc().getPointer() - LBuf->getBufferStart();
   SourceLocation NewLoc =
   CSM.getLocForStartOfFile(FID).getLocWithOffset(Offset);
   return FullSourceLoc(NewLoc, CSM);
@@ -254,13 +300,24 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D,
   FullSourceLoc Loc;
   if (D.getLoc() != SMLoc())
     Loc = ConvertBackendLocation(D, Context->getSourceManager());
-  
 
+  unsigned DiagID;
+  switch (D.getKind()) {
+  case llvm::SourceMgr::DK_Error:
+    DiagID = diag::err_fe_inline_asm;
+    break;
+  case llvm::SourceMgr::DK_Warning:
+    DiagID = diag::warn_fe_inline_asm;
+    break;
+  case llvm::SourceMgr::DK_Note:
+    DiagID = diag::note_fe_inline_asm;
+    break;
+  }
   // If this problem has clang-level source location information, report the
-  // issue as being an error in the source with a note showing the instantiated
+  // issue in the source with a note showing the instantiated
   // code.
   if (LocCookie.isValid()) {
-    Diags.Report(LocCookie, diag::err_fe_inline_asm).AddString(Message);
+    Diags.Report(LocCookie, DiagID).AddString(Message);
     
     if (D.getLoc().isValid()) {
       DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here);
@@ -276,16 +333,224 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D,
     return;
   }
   
-  // Otherwise, report the backend error as occurring in the generated .s file.
-  // If Loc is invalid, we still need to report the error, it just gets no
+  // Otherwise, report the backend issue as occurring in the generated .s file.
+  // If Loc is invalid, we still need to report the issue, it just gets no
   // location info.
-  Diags.Report(Loc, diag::err_fe_inline_asm).AddString(Message);
+  Diags.Report(Loc, DiagID).AddString(Message);
 }
 
-//
+#define ComputeDiagID(Severity, GroupName, DiagID)                             \
+  do {                                                                         \
+    switch (Severity) {                                                        \
+    case llvm::DS_Error:                                                       \
+      DiagID = diag::err_fe_##GroupName;                                       \
+      break;                                                                   \
+    case llvm::DS_Warning:                                                     \
+      DiagID = diag::warn_fe_##GroupName;                                      \
+      break;                                                                   \
+    case llvm::DS_Remark:                                                      \
+      llvm_unreachable("'remark' severity not expected");                      \
+      break;                                                                   \
+    case llvm::DS_Note:                                                        \
+      DiagID = diag::note_fe_##GroupName;                                      \
+      break;                                                                   \
+    }                                                                          \
+  } while (false)
+
+#define ComputeDiagRemarkID(Severity, GroupName, DiagID)                       \
+  do {                                                                         \
+    switch (Severity) {                                                        \
+    case llvm::DS_Error:                                                       \
+      DiagID = diag::err_fe_##GroupName;                                       \
+      break;                                                                   \
+    case llvm::DS_Warning:                                                     \
+      DiagID = diag::warn_fe_##GroupName;                                      \
+      break;                                                                   \
+    case llvm::DS_Remark:                                                      \
+      DiagID = diag::remark_fe_##GroupName;                                    \
+      break;                                                                   \
+    case llvm::DS_Note:                                                        \
+      DiagID = diag::note_fe_##GroupName;                                      \
+      break;                                                                   \
+    }                                                                          \
+  } while (false)
+
+bool
+BackendConsumer::InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D) {
+  unsigned DiagID;
+  ComputeDiagID(D.getSeverity(), inline_asm, DiagID);
+  std::string Message = D.getMsgStr().str();
+
+  // If this problem has clang-level source location information, report the
+  // issue as being a problem in the source with a note showing the instantiated
+  // code.
+  SourceLocation LocCookie =
+      SourceLocation::getFromRawEncoding(D.getLocCookie());
+  if (LocCookie.isValid())
+    Diags.Report(LocCookie, DiagID).AddString(Message);
+  else {
+    // Otherwise, report the backend diagnostic as occurring in the generated
+    // .s file.
+    // If Loc is invalid, we still need to report the diagnostic, it just gets
+    // no location info.
+    FullSourceLoc Loc;
+    Diags.Report(Loc, DiagID).AddString(Message);
+  }
+  // We handled all the possible severities.
+  return true;
+}
+
+bool
+BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
+  if (D.getSeverity() != llvm::DS_Warning)
+    // For now, the only support we have for StackSize diagnostic is warning.
+    // We do not know how to format other severities.
+    return false;
+
+  if (const Decl *ND = Gen->GetDeclForMangledName(D.getFunction().getName())) {
+    Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()),
+                 diag::warn_fe_frame_larger_than)
+        << D.getStackSize() << Decl::castToDeclContext(ND);
+    return true;
+  }
+
+  return false;
+}
+
+void BackendConsumer::EmitOptimizationMessage(
+    const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) {
+  // We only support warnings and remarks.
+  assert(D.getSeverity() == llvm::DS_Remark ||
+         D.getSeverity() == llvm::DS_Warning);
+
+  SourceManager &SourceMgr = Context->getSourceManager();
+  FileManager &FileMgr = SourceMgr.getFileManager();
+  StringRef Filename;
+  unsigned Line, Column;
+  D.getLocation(&Filename, &Line, &Column);
+  SourceLocation DILoc;
+  const FileEntry *FE = FileMgr.getFile(Filename);
+  if (FE && Line > 0) {
+    // If -gcolumn-info was not used, Column will be 0. This upsets the
+    // source manager, so pass 1 if Column is not set.
+    DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+  }
+
+  // If a location isn't available, try to approximate it using the associated
+  // function definition. We use the definition's right brace to differentiate
+  // from diagnostics that genuinely relate to the function itself.
+  FullSourceLoc Loc(DILoc, SourceMgr);
+  if (Loc.isInvalid())
+    if (const Decl *FD = Gen->GetDeclForMangledName(D.getFunction().getName()))
+      Loc = FD->getASTContext().getFullLoc(FD->getBodyRBrace());
+
+  Diags.Report(Loc, DiagID)
+      << AddFlagValue(D.getPassName() ? D.getPassName() : "")
+      << D.getMsg().str();
+
+  if (DILoc.isInvalid())
+    // If we were not able to translate the file:line:col information
+    // back to a SourceLocation, at least emit a note stating that
+    // we could not translate this location. This can happen in the
+    // case of #line directives.
+    Diags.Report(Loc, diag::note_fe_backend_optimization_remark_invalid_loc)
+        << Filename << Line << Column;
+}
+
+void BackendConsumer::OptimizationRemarkHandler(
+    const llvm::DiagnosticInfoOptimizationRemark &D) {
+  // Optimization remarks are active only if the -Rpass flag has a regular
+  // expression that matches the name of the pass name in \p D.
+  if (CodeGenOpts.OptimizationRemarkPattern &&
+      CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName()))
+    EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark);
+}
+
+void BackendConsumer::OptimizationRemarkHandler(
+    const llvm::DiagnosticInfoOptimizationRemarkMissed &D) {
+  // Missed optimization remarks are active only if the -Rpass-missed
+  // flag has a regular expression that matches the name of the pass
+  // name in \p D.
+  if (CodeGenOpts.OptimizationRemarkMissedPattern &&
+      CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName()))
+    EmitOptimizationMessage(D,
+                            diag::remark_fe_backend_optimization_remark_missed);
+}
+
+void BackendConsumer::OptimizationRemarkHandler(
+    const llvm::DiagnosticInfoOptimizationRemarkAnalysis &D) {
+  // Optimization analysis remarks are active only if the -Rpass-analysis
+  // flag has a regular expression that matches the name of the pass
+  // name in \p D.
+  if (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
+      CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))
+    EmitOptimizationMessage(
+        D, diag::remark_fe_backend_optimization_remark_analysis);
+}
+
+void BackendConsumer::OptimizationFailureHandler(
+    const llvm::DiagnosticInfoOptimizationFailure &D) {
+  EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure);
+}
+
+/// \brief This function is invoked when the backend needs
+/// to report something to the user.
+void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
+  unsigned DiagID = diag::err_fe_inline_asm;
+  llvm::DiagnosticSeverity Severity = DI.getSeverity();
+  // Get the diagnostic ID based.
+  switch (DI.getKind()) {
+  case llvm::DK_InlineAsm:
+    if (InlineAsmDiagHandler(cast<DiagnosticInfoInlineAsm>(DI)))
+      return;
+    ComputeDiagID(Severity, inline_asm, DiagID);
+    break;
+  case llvm::DK_StackSize:
+    if (StackSizeDiagHandler(cast<DiagnosticInfoStackSize>(DI)))
+      return;
+    ComputeDiagID(Severity, backend_frame_larger_than, DiagID);
+    break;
+  case llvm::DK_OptimizationRemark:
+    // Optimization remarks are always handled completely by this
+    // handler. There is no generic way of emitting them.
+    OptimizationRemarkHandler(cast<DiagnosticInfoOptimizationRemark>(DI));
+    return;
+  case llvm::DK_OptimizationRemarkMissed:
+    // Optimization remarks are always handled completely by this
+    // handler. There is no generic way of emitting them.
+    OptimizationRemarkHandler(cast<DiagnosticInfoOptimizationRemarkMissed>(DI));
+    return;
+  case llvm::DK_OptimizationRemarkAnalysis:
+    // Optimization remarks are always handled completely by this
+    // handler. There is no generic way of emitting them.
+    OptimizationRemarkHandler(
+        cast<DiagnosticInfoOptimizationRemarkAnalysis>(DI));
+    return;
+  case llvm::DK_OptimizationFailure:
+    // Optimization failures are always handled completely by this
+    // handler.
+    OptimizationFailureHandler(cast<DiagnosticInfoOptimizationFailure>(DI));
+    return;
+  default:
+    // Plugin IDs are not bound to any value as they are set dynamically.
+    ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
+    break;
+  }
+  std::string MsgStorage;
+  {
+    raw_string_ostream Stream(MsgStorage);
+    DiagnosticPrinterRawOStream DP(Stream);
+    DI.print(DP);
+  }
+
+  // Report the backend message using the usual diagnostic mechanism.
+  FullSourceLoc Loc;
+  Diags.Report(Loc, DiagID).AddString(MsgStorage);
+}
+#undef ComputeDiagID
 
 CodeGenAction::CodeGenAction(unsigned _Act, LLVMContext *_VMContext)
-  : Act(_Act), LinkModule(0),
+  : Act(_Act), LinkModule(nullptr),
     VMContext(_VMContext ? _VMContext : new LLVMContext),
     OwnsVMContext(!_VMContext) {}
 
@@ -310,9 +575,7 @@ void CodeGenAction::EndSourceFileAction() {
   TheModule.reset(BEConsumer->takeModule());
 }
 
-llvm::Module *CodeGenAction::takeModule() {
-  return TheModule.take();
-}
+llvm::Module *CodeGenAction::takeModule() { return TheModule.release(); }
 
 llvm::LLVMContext *CodeGenAction::takeLLVMContext() {
   OwnsVMContext = false;
@@ -330,8 +593,9 @@ static raw_ostream *GetOutputStream(CompilerInstance &CI,
   case Backend_EmitBC:
     return CI.createDefaultOutputFile(true, InFile, "bc");
   case Backend_EmitNothing:
-    return 0;
+    return nullptr;
   case Backend_EmitMCNull:
+    return CI.createNullOutputFile();
   case Backend_EmitObj:
     return CI.createDefaultOutputFile(true, InFile, "o");
   }
@@ -342,9 +606,9 @@ static raw_ostream *GetOutputStream(CompilerInstance &CI,
 ASTConsumer *CodeGenAction::CreateASTConsumer(CompilerInstance &CI,
                                               StringRef InFile) {
   BackendAction BA = static_cast<BackendAction>(Act);
-  OwningPtr<raw_ostream> OS(GetOutputStream(CI, InFile, BA));
+  std::unique_ptr<raw_ostream> OS(GetOutputStream(CI, InFile, BA));
   if (BA != Backend_EmitNothing && !OS)
-    return 0;
+    return nullptr;
 
   llvm::Module *LinkModuleToUse = LinkModule;
 
@@ -359,23 +623,23 @@ ASTConsumer *CodeGenAction::CreateASTConsumer(CompilerInstance &CI,
     if (!BCBuf) {
       CI.getDiagnostics().Report(diag::err_cannot_open_file)
         << LinkBCFile << ErrorStr;
-      return 0;
+      return nullptr;
     }
 
-    LinkModuleToUse = getLazyBitcodeModule(BCBuf, *VMContext, &ErrorStr);
-    if (!LinkModuleToUse) {
+    ErrorOr<llvm::Module *> ModuleOrErr =
+        getLazyBitcodeModule(BCBuf, *VMContext);
+    if (std::error_code EC = ModuleOrErr.getError()) {
       CI.getDiagnostics().Report(diag::err_cannot_open_file)
-        << LinkBCFile << ErrorStr;
-      return 0;
+        << LinkBCFile << EC.message();
+      return nullptr;
     }
+    LinkModuleToUse = ModuleOrErr.get();
   }
 
-  BEConsumer = 
-      new BackendConsumer(BA, CI.getDiagnostics(),
-                          CI.getCodeGenOpts(), CI.getTargetOpts(),
-                          CI.getLangOpts(),
-                          CI.getFrontendOpts().ShowTimers, InFile,
-                          LinkModuleToUse, OS.take(), *VMContext);
+  BEConsumer = new BackendConsumer(BA, CI.getDiagnostics(), CI.getCodeGenOpts(),
+                                   CI.getTargetOpts(), CI.getLangOpts(),
+                                   CI.getFrontendOpts().ShowTimers, InFile,
+                                   LinkModuleToUse, OS.release(), *VMContext);
   return BEConsumer;
 }
 
@@ -390,49 +654,48 @@ void CodeGenAction::ExecuteAction() {
 
     bool Invalid;
     SourceManager &SM = CI.getSourceManager();
-    const llvm::MemoryBuffer *MainFile = SM.getBuffer(SM.getMainFileID(),
-                                                      &Invalid);
+    FileID FID = SM.getMainFileID();
+    llvm::MemoryBuffer *MainFile = SM.getBuffer(FID, &Invalid);
     if (Invalid)
       return;
 
-    // FIXME: This is stupid, IRReader shouldn't take ownership.
-    llvm::MemoryBuffer *MainFileCopy =
-      llvm::MemoryBuffer::getMemBufferCopy(MainFile->getBuffer(),
-                                           getCurrentFile());
-
     llvm::SMDiagnostic Err;
-    TheModule.reset(ParseIR(MainFileCopy, Err, *VMContext));
+    TheModule.reset(ParseIR(MainFile, Err, *VMContext));
     if (!TheModule) {
-      // Translate from the diagnostic info to the SourceManager location.
-      SourceLocation Loc = SM.translateFileLineCol(
-        SM.getFileEntryForID(SM.getMainFileID()), Err.getLineNo(),
-        Err.getColumnNo() + 1);
+      // Translate from the diagnostic info to the SourceManager location if
+      // available.
+      // TODO: Unify this with ConvertBackendLocation()
+      SourceLocation Loc;
+      if (Err.getLineNo() > 0) {
+        assert(Err.getColumnNo() >= 0);
+        Loc = SM.translateFileLineCol(SM.getFileEntryForID(FID),
+                                      Err.getLineNo(), Err.getColumnNo() + 1);
+      }
 
-      // Get a custom diagnostic for the error. We strip off a leading
-      // diagnostic code if there is one.
+      // Strip off a leading diagnostic code if there is one.
       StringRef Msg = Err.getMessage();
       if (Msg.startswith("error: "))
         Msg = Msg.substr(7);
 
-      // Escape '%', which is interpreted as a format character.
-      SmallString<128> EscapedMessage;
-      for (unsigned i = 0, e = Msg.size(); i != e; ++i) {
-        if (Msg[i] == '%')
-          EscapedMessage += '%';
-        EscapedMessage += Msg[i];
-      }
+      unsigned DiagID =
+          CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0");
 
+      CI.getDiagnostics().Report(Loc, DiagID) << Msg;
+      return;
+    }
+    const TargetOptions &TargetOpts = CI.getTargetOpts();
+    if (TheModule->getTargetTriple() != TargetOpts.Triple) {
       unsigned DiagID = CI.getDiagnostics().getCustomDiagID(
-          DiagnosticsEngine::Error, EscapedMessage);
+          DiagnosticsEngine::Warning,
+          "overriding the module target triple with %0");
 
-      CI.getDiagnostics().Report(Loc, DiagID);
-      return;
+      CI.getDiagnostics().Report(SourceLocation(), DiagID) << TargetOpts.Triple;
+      TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
-    EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(),
-                      CI.getTargetOpts(), CI.getLangOpts(),
-                      TheModule.get(),
-                      BA, OS);
+    EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
+                      CI.getLangOpts(), CI.getTarget().getTargetDescription(),
+                      TheModule.get(), BA, OS);
     return;
   }
 
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index ce1b44559dcc..5ca3a78bb4fa 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -15,13 +15,14 @@
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenModule.h"
+#include "CodeGenPGO.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/StmtCXX.h"
-#include "clang/Basic/OpenCL.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
@@ -34,22 +35,23 @@ using namespace CodeGen;
 
 CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
     : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()),
-      Builder(cgm.getModule().getContext()), CapturedStmtInfo(0),
-      SanitizePerformTypeCheck(CGM.getSanOpts().Null |
-                               CGM.getSanOpts().Alignment |
-                               CGM.getSanOpts().ObjectSize |
-                               CGM.getSanOpts().Vptr),
-      SanOpts(&CGM.getSanOpts()), AutoreleaseResult(false), BlockInfo(0),
-      BlockPointer(0), LambdaThisCaptureField(0), NormalCleanupDest(0),
-      NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
-      ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
-      DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
-      SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
-      NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
-      CXXThisValue(0), CXXDefaultInitExprThis(0),
-      CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
-      OutermostConditional(0), CurLexicalScope(0), TerminateLandingPad(0),
-      TerminateHandler(0), TrapBB(0) {
+      Builder(cgm.getModule().getContext(), llvm::ConstantFolder(),
+              CGBuilderInserterTy(this)),
+      CapturedStmtInfo(nullptr), SanOpts(&CGM.getLangOpts().Sanitize),
+      IsSanitizerScope(false), AutoreleaseResult(false), BlockInfo(nullptr),
+      BlockPointer(nullptr), LambdaThisCaptureField(nullptr),
+      NormalCleanupDest(nullptr), NextCleanupDestIndex(1),
+      FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr),
+      EHSelectorSlot(nullptr), DebugInfo(CGM.getModuleDebugInfo()),
+      DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(nullptr),
+      PGO(cgm), SwitchInsn(nullptr), SwitchWeights(nullptr),
+      CaseRangeBlock(nullptr), UnreachableBlock(nullptr), NumReturnExprs(0),
+      NumSimpleReturnExprs(0), CXXABIThisDecl(nullptr),
+      CXXABIThisValue(nullptr), CXXThisValue(nullptr),
+      CXXDefaultInitExprThis(nullptr), CXXStructorImplicitParamDecl(nullptr),
+      CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr),
+      CurLexicalScope(nullptr), TerminateLandingPad(nullptr),
+      TerminateHandler(nullptr), TrapBB(nullptr) {
   if (!suppressNewContext)
     CGM.getCXXABI().getMangleContext().startNewFunction();
 
@@ -71,6 +73,10 @@ CodeGenFunction::~CodeGenFunction() {
   // something.
   if (FirstBlockInfo)
     destroyBlockInfos(FirstBlockInfo);
+
+  if (getLangOpts().OpenMP) {
+    CGM.getOpenMPRuntime().FunctionFinished(*this);
+  }
 }
 
 
@@ -157,7 +163,7 @@ void CodeGenFunction::EmitReturnBlock() {
   // cleans up functions which started with a unified return block.
   if (ReturnBlock.getBlock()->hasOneUse()) {
     llvm::BranchInst *BI =
-      dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->use_begin());
+      dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->user_begin());
     if (BI && BI->isUnconditional() &&
         BI->getSuccessor(0) == ReturnBlock.getBlock()) {
       // Reset insertion point, including debug location, and delete the
@@ -255,7 +261,7 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
 
   // Remove the AllocaInsertPt instruction, which is just a convenience for us.
   llvm::Instruction *Ptr = AllocaInsertPt;
-  AllocaInsertPt = 0;
+  AllocaInsertPt = nullptr;
   Ptr->eraseFromParent();
 
   // If someone took the address of a label but never did an indirect goto, we
@@ -275,6 +281,14 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
 
   if (CGM.getCodeGenOpts().EmitDeclMetadata)
     EmitDeclMetadata();
+
+  for (SmallVectorImpl<std::pair<llvm::Instruction *, llvm::Value *> >::iterator
+           I = DeferredReplacements.begin(),
+           E = DeferredReplacements.end();
+       I != E; ++I) {
+    I->first->replaceAllUsesWith(I->second);
+    I->first->eraseFromParent();
+  }
 }
 
 /// ShouldInstrumentFunction - Return true if the current function should be
@@ -330,6 +344,8 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
   // Each MDNode is a list in the form of "key", N number of values which is
   // the same number of values as their are kernel arguments.
 
+  const PrintingPolicy &Policy = ASTCtx.getPrintingPolicy();
+
   // MDNode for the kernel argument address space qualifiers.
   SmallVector<llvm::Value*, 8> addressQuals;
   addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space"));
@@ -363,7 +379,8 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
         pointeeTy.getAddressSpace())));
 
       // Get argument type name.
-      std::string typeName = pointeeTy.getUnqualifiedType().getAsString() + "*";
+      std::string typeName =
+          pointeeTy.getUnqualifiedType().getAsString(Policy) + "*";
 
       // Turn "unsigned type" to "utype"
       std::string::size_type pos = typeName.find("unsigned");
@@ -381,10 +398,15 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
       if (pointeeTy.isVolatileQualified())
         typeQuals += typeQuals.empty() ? "volatile" : " volatile";
     } else {
-      addressQuals.push_back(Builder.getInt32(0));
+      uint32_t AddrSpc = 0;
+      if (ty->isImageType())
+        AddrSpc =
+          CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+
+      addressQuals.push_back(Builder.getInt32(AddrSpc));
 
       // Get argument type name.
-      std::string typeName = ty.getUnqualifiedType().getAsString();
+      std::string typeName = ty.getUnqualifiedType().getAsString(Policy);
 
       // Turn "unsigned type" to "utype"
       std::string::size_type pos = typeName.find("unsigned");
@@ -399,16 +421,17 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
       if (ty.isVolatileQualified())
         typeQuals += typeQuals.empty() ? "volatile" : " volatile";
     }
-    
+
     argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals));
 
     // Get image access qualifier:
     if (ty->isImageType()) {
-      if (parm->hasAttr<OpenCLImageAccessAttr>() &&
-          parm->getAttr<OpenCLImageAccessAttr>()->getAccess() == CLIA_write_only)
+      const OpenCLImageAccessAttr *A = parm->getAttr<OpenCLImageAccessAttr>();
+      if (A && A->isWriteOnly())
         accessQuals.push_back(llvm::MDString::get(Context, "write_only"));
       else
         accessQuals.push_back(llvm::MDString::get(Context, "read_only"));
+      // FIXME: what about read_write?
     } else
       accessQuals.push_back(llvm::MDString::get(Context, "none"));
 
@@ -438,16 +461,15 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
     GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs,
                          Builder, getContext());
 
-  if (FD->hasAttr<VecTypeHintAttr>()) {
-    VecTypeHintAttr *attr = FD->getAttr<VecTypeHintAttr>();
-    QualType hintQTy = attr->getTypeHint();
+  if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
+    QualType hintQTy = A->getTypeHint();
     const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
     bool isSignedInteger =
         hintQTy->isSignedIntegerType() ||
         (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
     llvm::Value *attrMDArgs[] = {
       llvm::MDString::get(Context, "vec_type_hint"),
-      llvm::UndefValue::get(CGM.getTypes().ConvertType(attr->getTypeHint())),
+      llvm::UndefValue::get(CGM.getTypes().ConvertType(A->getTypeHint())),
       llvm::ConstantInt::get(
           llvm::IntegerType::get(Context, 32),
           llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0)))
@@ -455,24 +477,22 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
 
-  if (FD->hasAttr<WorkGroupSizeHintAttr>()) {
-    WorkGroupSizeHintAttr *attr = FD->getAttr<WorkGroupSizeHintAttr>();
+  if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
     llvm::Value *attrMDArgs[] = {
       llvm::MDString::get(Context, "work_group_size_hint"),
-      Builder.getInt32(attr->getXDim()),
-      Builder.getInt32(attr->getYDim()),
-      Builder.getInt32(attr->getZDim())
+      Builder.getInt32(A->getXDim()),
+      Builder.getInt32(A->getYDim()),
+      Builder.getInt32(A->getZDim())
     };
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
 
-  if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) {
-    ReqdWorkGroupSizeAttr *attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
+  if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
     llvm::Value *attrMDArgs[] = {
       llvm::MDString::get(Context, "reqd_work_group_size"),
-      Builder.getInt32(attr->getXDim()),
-      Builder.getInt32(attr->getYDim()),
-      Builder.getInt32(attr->getZDim())
+      Builder.getInt32(A->getXDim()),
+      Builder.getInt32(A->getYDim()),
+      Builder.getInt32(A->getZDim())
     };
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
@@ -483,37 +503,55 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
   OpenCLKernelMetadata->addOperand(kernelMDNode);
 }
 
+/// Determine whether the function F ends with a return stmt.
+static bool endsWithReturn(const Decl* F) {
+  const Stmt *Body = nullptr;
+  if (auto *FD = dyn_cast_or_null<FunctionDecl>(F))
+    Body = FD->getBody();
+  else if (auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(F))
+    Body = OMD->getBody();
+
+  if (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) {
+    auto LastStmt = CS->body_rbegin();
+    if (LastStmt != CS->body_rend())
+      return isa<ReturnStmt>(*LastStmt);
+  }
+  return false;
+}
+
 void CodeGenFunction::StartFunction(GlobalDecl GD,
                                     QualType RetTy,
                                     llvm::Function *Fn,
                                     const CGFunctionInfo &FnInfo,
                                     const FunctionArgList &Args,
+                                    SourceLocation Loc,
                                     SourceLocation StartLoc) {
   const Decl *D = GD.getDecl();
 
   DidCallStackSave = false;
   CurCodeDecl = D;
-  CurFuncDecl = (D ? D->getNonClosureContext() : 0);
+  CurFuncDecl = (D ? D->getNonClosureContext() : nullptr);
   FnRetTy = RetTy;
   CurFn = Fn;
   CurFnInfo = &FnInfo;
   assert(CurFn->isDeclaration() && "Function already has body?");
 
-  if (CGM.getSanitizerBlacklist().isIn(*Fn)) {
+  if (CGM.getSanitizerBlacklist().isIn(*Fn))
     SanOpts = &SanitizerOptions::Disabled;
-    SanitizePerformTypeCheck = false;
-  }
 
   // Pass inline keyword to optimizer if it appears explicitly on any
-  // declaration.
-  if (!CGM.getCodeGenOpts().NoInline)
-    if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
-      for (FunctionDecl::redecl_iterator RI = FD->redecls_begin(),
-             RE = FD->redecls_end(); RI != RE; ++RI)
+  // declaration. Also, in the case of -fno-inline attach NoInline
+  // attribute to all function that are not marked AlwaysInline.
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+    if (!CGM.getCodeGenOpts().NoInline) {
+      for (auto RI : FD->redecls())
         if (RI->isInlineSpecified()) {
           Fn->addFnAttr(llvm::Attribute::InlineHint);
           break;
         }
+    } else if (!FD->hasAttr<AlwaysInlineAttr>())
+      Fn->addFnAttr(llvm::Attribute::NoInline);
+  }
 
   if (getLangOpts().OpenCL) {
     // Add metadata for a kernel function.
@@ -562,9 +600,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
     QualType FnType =
       getContext().getFunctionType(RetTy, ArgTypes,
                                    FunctionProtoType::ExtProtoInfo());
-
-    DI->setLocation(StartLoc);
-    DI->EmitFunctionStart(GD, FnType, CurFn, Builder);
+    DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder);
   }
 
   if (ShouldInstrumentFunction())
@@ -575,12 +611,27 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
   if (RetTy->isVoidType()) {
     // Void type; nothing to return.
-    ReturnValue = 0;
+    ReturnValue = nullptr;
+
+    // Count the implicit return.
+    if (!endsWithReturn(D))
+      ++NumReturnExprs;
   } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
              !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
     // Indirect aggregate return; emit returned value directly into sret slot.
     // This reduces code size, and affects correctness in C++.
-    ReturnValue = CurFn->arg_begin();
+    auto AI = CurFn->arg_begin();
+    if (CurFnInfo->getReturnInfo().isSRetAfterThis())
+      ++AI;
+    ReturnValue = AI;
+  } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca &&
+             !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
+    // Load the sret pointer from the argument struct and return into that.
+    unsigned Idx = CurFnInfo->getReturnInfo().getInAllocaFieldIndex();
+    llvm::Function::arg_iterator EI = CurFn->arg_end();
+    --EI;
+    llvm::Value *Addr = Builder.CreateStructGEP(EI, Idx);
+    ReturnValue = Builder.CreateLoad(Addr, "agg.result");
   } else {
     ReturnValue = CreateIRTemp(RetTy, "retval");
 
@@ -645,12 +696,34 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
 void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
                                        const Stmt *Body) {
+  RegionCounter Cnt = getPGORegionCounter(Body);
+  Cnt.beginRegion(Builder);
   if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
     EmitCompoundStmtWithoutScope(*S);
   else
     EmitStmt(Body);
 }
 
+/// When instrumenting to collect profile data, the counts for some blocks
+/// such as switch cases need to not include the fall-through counts, so
+/// emit a branch around the instrumentation code. When not instrumenting,
+/// this just calls EmitBlock().
+void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
+                                               RegionCounter &Cnt) {
+  llvm::BasicBlock *SkipCountBB = nullptr;
+  if (HaveInsertPoint() && CGM.getCodeGenOpts().ProfileInstrGenerate) {
+    // When instrumenting for profiling, the fallthrough to certain
+    // statements needs to skip over the instrumentation code so that we
+    // get an accurate count.
+    SkipCountBB = createBasicBlock("skipcount");
+    EmitBranch(SkipCountBB);
+  }
+  EmitBlock(BB);
+  Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
+  if (SkipCountBB)
+    EmitBlock(SkipCountBB);
+}
+
 /// Tries to mark the given function nounwind based on the
 /// non-existence of any throwing calls within it.  We believe this is
 /// lightweight enough to do at -O0.
@@ -688,30 +761,47 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
 
   // Check if we should generate debug info for this function.
   if (FD->hasAttr<NoDebugAttr>())
-    DebugInfo = NULL; // disable debug info indefinitely for this function
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
 
   FunctionArgList Args;
-  QualType ResTy = FD->getResultType();
+  QualType ResTy = FD->getReturnType();
 
   CurGD = GD;
-  const CXXMethodDecl *MD;
-  if ((MD = dyn_cast<CXXMethodDecl>(FD)) && MD->isInstance()) {
+  const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
+  if (MD && MD->isInstance()) {
     if (CGM.getCXXABI().HasThisReturn(GD))
       ResTy = MD->getThisType(getContext());
-    CGM.getCXXABI().BuildInstanceFunctionParams(*this, ResTy, Args);
+    CGM.getCXXABI().buildThisParam(*this, Args);
   }
 
   for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i)
     Args.push_back(FD->getParamDecl(i));
 
+  if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)))
+    CGM.getCXXABI().addImplicitStructorParams(*this, ResTy, Args);
+
   SourceRange BodyRange;
   if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
   CurEHLocation = BodyRange.getEnd();
 
+  // Use the location of the start of the function to determine where
+  // the function definition is located. By default use the location
+  // of the declaration as the location for the subprogram. A function
+  // may lack a declaration in the source code if it is created by code
+  // gen. (examples: _GLOBAL__I_a, __cxx_global_array_dtor, thunk).
+  SourceLocation Loc = FD->getLocation();
+
+  // If this is a function specialization then use the pattern body
+  // as the location for the function.
+  if (const FunctionDecl *SpecDecl = FD->getTemplateInstantiationPattern())
+    if (SpecDecl->hasBody(SpecDecl))
+      Loc = SpecDecl->getLocation();
+
   // Emit the standard function prologue.
-  StartFunction(GD, ResTy, Fn, FnInfo, Args, BodyRange.getBegin());
+  StartFunction(GD, ResTy, Fn, FnInfo, Args, Loc, BodyRange.getBegin());
 
   // Generate the body of the function.
+  PGO.assignRegionCounters(GD.getDecl(), CurFn);
   if (isa<CXXDestructorDecl>(FD))
     EmitDestructorBody(Args);
   else if (isa<CXXConstructorDecl>(FD))
@@ -753,12 +843,13 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   //   If the '}' that terminates a function is reached, and the value of the
   //   function call is used by the caller, the behavior is undefined.
   if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() &&
-      !FD->getResultType()->isVoidType() && Builder.GetInsertBlock()) {
-    if (SanOpts->Return)
+      !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
+    if (SanOpts->Return) {
+      SanitizerScope SanScope(this);
       EmitCheck(Builder.getFalse(), "missing_return",
                 EmitCheckSourceLocation(FD->getLocation()),
                 ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
-    else if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+    } else if (CGM.getCodeGenOpts().OptimizationLevel == 0)
       Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap));
     Builder.CreateUnreachable();
     Builder.ClearInsertionPoint();
@@ -771,6 +862,9 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   // a quick pass now to see if we can.
   if (!CurFn->doesNotThrow())
     TryMarkNoThrow(CurFn);
+
+  PGO.emitInstrumentationData();
+  PGO.destroyRegionCounters();
 }
 
 /// ContainsLabel - Return true if the statement contains a label in it.  If
@@ -778,7 +872,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
 /// that we can just remove the code.
 bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
   // Null statement, not a label!
-  if (S == 0) return false;
+  if (!S) return false;
 
   // If this is a label, we have to emit the code, consider something like:
   // if (0) {  ...  foo:  bar(); }  goto foo;
@@ -810,7 +904,7 @@ bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
 /// inside of it, this is fine.
 bool CodeGenFunction::containsBreak(const Stmt *S) {
   // Null statement, not a label!
-  if (S == 0) return false;
+  if (!S) return false;
 
   // If this is a switch or loop that defines its own break scope, then we can
   // include it and anything inside of it.
@@ -869,19 +963,25 @@ ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) {
 ///
 void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
                                            llvm::BasicBlock *TrueBlock,
-                                           llvm::BasicBlock *FalseBlock) {
+                                           llvm::BasicBlock *FalseBlock,
+                                           uint64_t TrueCount) {
   Cond = Cond->IgnoreParens();
 
   if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) {
+
     // Handle X && Y in a condition.
     if (CondBOp->getOpcode() == BO_LAnd) {
+      RegionCounter Cnt = getPGORegionCounter(CondBOp);
+
       // If we have "1 && X", simplify the code.  "0 && X" would have constant
       // folded if the case was simple enough.
       bool ConstantBool = false;
       if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
           ConstantBool) {
         // br(1 && X) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+        Cnt.beginRegion(Builder);
+        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // If we have "X && 1", simplify the code to use an uncond branch.
@@ -889,33 +989,42 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
       if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
           ConstantBool) {
         // br(X && 1) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // Emit the LHS as a conditional.  If the LHS conditional is false, we
       // want to jump to the FalseBlock.
       llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
+      // The counter tells us how often we evaluate RHS, and all of TrueCount
+      // can be propagated to that branch.
+      uint64_t RHSCount = Cnt.getCount();
 
       ConditionalEvaluation eval(*this);
-      EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock);
+      EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
       EmitBlock(LHSTrue);
 
       // Any temporaries created here are conditional.
+      Cnt.beginRegion(Builder);
       eval.begin(*this);
-      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
       eval.end(*this);
 
       return;
     }
 
     if (CondBOp->getOpcode() == BO_LOr) {
+      RegionCounter Cnt = getPGORegionCounter(CondBOp);
+
       // If we have "0 || X", simplify the code.  "1 || X" would have constant
       // folded if the case was simple enough.
       bool ConstantBool = false;
       if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
           !ConstantBool) {
         // br(0 || X) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+        Cnt.beginRegion(Builder);
+        return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // If we have "X || 0", simplify the code to use an uncond branch.
@@ -923,20 +1032,28 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
       if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
           !ConstantBool) {
         // br(X || 0) -> br(X).
-        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
+        return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
+                                    TrueCount);
       }
 
       // Emit the LHS as a conditional.  If the LHS conditional is true, we
       // want to jump to the TrueBlock.
       llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false");
+      // We have the count for entry to the RHS and for the whole expression
+      // being true, so we can divy up True count between the short circuit and
+      // the RHS.
+      uint64_t LHSCount = Cnt.getParentCount() - Cnt.getCount();
+      uint64_t RHSCount = TrueCount - LHSCount;
 
       ConditionalEvaluation eval(*this);
-      EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse);
+      EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
       EmitBlock(LHSFalse);
 
       // Any temporaries created here are conditional.
+      Cnt.beginRegion(Builder);
       eval.begin(*this);
-      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
+      EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
+
       eval.end(*this);
 
       return;
@@ -945,8 +1062,13 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
 
   if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) {
     // br(!x, t, f) -> br(x, f, t)
-    if (CondUOp->getOpcode() == UO_LNot)
-      return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock);
+    if (CondUOp->getOpcode() == UO_LNot) {
+      // Negate the count.
+      uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
+      // Negate the condition and swap the destination blocks.
+      return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
+                                  FalseCount);
+    }
   }
 
   if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(Cond)) {
@@ -954,17 +1076,32 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
     llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
     llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
 
+    RegionCounter Cnt = getPGORegionCounter(CondOp);
     ConditionalEvaluation cond(*this);
-    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock);
+    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
+
+    // When computing PGO branch weights, we only know the overall count for
+    // the true block. This code is essentially doing tail duplication of the
+    // naive code-gen, introducing new edges for which counts are not
+    // available. Divide the counts proportionally between the LHS and RHS of
+    // the conditional operator.
+    uint64_t LHSScaledTrueCount = 0;
+    if (TrueCount) {
+      double LHSRatio = Cnt.getCount() / (double) Cnt.getParentCount();
+      LHSScaledTrueCount = TrueCount * LHSRatio;
+    }
 
     cond.begin(*this);
     EmitBlock(LHSBlock);
-    EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock);
+    Cnt.beginRegion(Builder);
+    EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
+                         LHSScaledTrueCount);
     cond.end(*this);
 
     cond.begin(*this);
     EmitBlock(RHSBlock);
-    EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock);
+    EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock,
+                         TrueCount - LHSScaledTrueCount);
     cond.end(*this);
 
     return;
@@ -980,9 +1117,15 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
     return;
   }
 
+  // Create branch weights based on the number of times we get here and the
+  // number of times the condition should be true.
+  uint64_t CurrentCount = std::max(PGO.getCurrentRegionCount(), TrueCount);
+  llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
+                                                  CurrentCount - TrueCount);
+
   // Emit the code with the fully general case.
   llvm::Value *CondV = EvaluateExprAsBool(Cond);
-  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock);
+  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
 }
 
 /// ErrorUnsupported - Print out an error that codegen doesn't support the
@@ -1075,7 +1218,7 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
                                           getContext().getAsArrayType(Ty))) {
       QualType eltType;
       llvm::Value *numElts;
-      llvm::tie(numElts, eltType) = getVLASize(vlaType);
+      std::tie(numElts, eltType) = getVLASize(vlaType);
 
       SizeVal = numElts;
       CharUnits eltSize = getContext().getTypeSizeInChars(eltType);
@@ -1087,7 +1230,7 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
     }
   } else {
     SizeVal = CGM.getSize(Size);
-    vla = 0;
+    vla = nullptr;
   }
 
   // If the type contains a pointer to data member we can't memset it to zero.
@@ -1124,7 +1267,7 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
 
 llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) {
   // Make sure that there is a block for the indirect goto.
-  if (IndirectBranch == 0)
+  if (!IndirectBranch)
     GetIndirectGotoBlock();
 
   llvm::BasicBlock *BB = getJumpDestForLabel(L).getBlock();
@@ -1158,7 +1301,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
 
   // If it's a VLA, we have to load the stored size.  Note that
   // this is the size of the VLA in bytes, not its size in elements.
-  llvm::Value *numVLAElements = 0;
+  llvm::Value *numVLAElements = nullptr;
   if (isa<VariableArrayType>(arrayType)) {
     numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).first;
 
@@ -1251,7 +1394,7 @@ CodeGenFunction::getVLASize(QualType type) {
 std::pair<llvm::Value*, QualType>
 CodeGenFunction::getVLASize(const VariableArrayType *type) {
   // The number of elements so far; always size_t.
-  llvm::Value *numElements = 0;
+  llvm::Value *numElements = nullptr;
 
   QualType elementType;
   do {
@@ -1264,7 +1407,7 @@ CodeGenFunction::getVLASize(const VariableArrayType *type) {
       numElements = vlaSize;
     } else {
       // It's undefined behavior if this wraps around, so mark it that way.
-      // FIXME: Teach -fcatch-undefined-behavior to trap this.
+      // FIXME: Teach -fsanitize=undefined to trap this.
       numElements = Builder.CreateNUWMul(numElements, vlaSize);
     }
   } while ((type = getContext().getAsVariableArrayType(elementType)));
@@ -1308,6 +1451,10 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
     case Type::ObjCObjectPointer:
       llvm_unreachable("type class is never variably-modified!");
 
+    case Type::Adjusted:
+      type = cast<AdjustedType>(ty)->getAdjustedType();
+      break;
+
     case Type::Decayed:
       type = cast<DecayedType>(ty)->getPointeeType();
       break;
@@ -1354,6 +1501,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
           //   greater than zero.
           if (SanOpts->VLABound &&
               size->getType()->isSignedIntegerType()) {
+            SanitizerScope SanScope(this);
             llvm::Value *Zero = llvm::Constant::getNullValue(Size->getType());
             llvm::Constant *StaticArgs[] = {
               EmitCheckSourceLocation(size->getLocStart()),
@@ -1375,7 +1523,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
 
     case Type::FunctionProto:
     case Type::FunctionNoProto:
-      type = cast<FunctionType>(ty)->getResultType();
+      type = cast<FunctionType>(ty)->getReturnType();
       break;
 
     case Type::Paren:
@@ -1464,12 +1612,10 @@ void CodeGenFunction::EmitVarAnnotations(const VarDecl *D, llvm::Value *V) {
   assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
   // FIXME We create a new bitcast for every annotation because that's what
   // llvm-gcc was doing.
-  for (specific_attr_iterator<AnnotateAttr>
-       ai = D->specific_attr_begin<AnnotateAttr>(),
-       ae = D->specific_attr_end<AnnotateAttr>(); ai != ae; ++ai)
+  for (const auto *I : D->specific_attrs<AnnotateAttr>())
     EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation),
                        Builder.CreateBitCast(V, CGM.Int8PtrTy, V->getName()),
-                       (*ai)->getAnnotation(), D->getLocation());
+                       I->getAnnotation(), D->getLocation());
 }
 
 llvm::Value *CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
@@ -1479,15 +1625,13 @@ llvm::Value *CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation,
                                     CGM.Int8PtrTy);
 
-  for (specific_attr_iterator<AnnotateAttr>
-       ai = D->specific_attr_begin<AnnotateAttr>(),
-       ae = D->specific_attr_end<AnnotateAttr>(); ai != ae; ++ai) {
+  for (const auto *I : D->specific_attrs<AnnotateAttr>()) {
     // FIXME Always emit the cast inst so we can differentiate between
     // annotation on the first field of a struct and annotation on the struct
     // itself.
     if (VTy != CGM.Int8PtrTy)
       V = Builder.Insert(new llvm::BitCastInst(V, CGM.Int8PtrTy));
-    V = EmitAnnotationCall(F, V, (*ai)->getAnnotation(), D->getLocation());
+    V = EmitAnnotationCall(F, V, I->getAnnotation(), D->getLocation());
     V = Builder.CreateBitCast(V, VTy);
   }
 
@@ -1495,3 +1639,45 @@ llvm::Value *CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
 }
 
 CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() { }
+
+CodeGenFunction::SanitizerScope::SanitizerScope(CodeGenFunction *CGF)
+    : CGF(CGF) {
+  assert(!CGF->IsSanitizerScope);
+  CGF->IsSanitizerScope = true;
+}
+
+CodeGenFunction::SanitizerScope::~SanitizerScope() {
+  CGF->IsSanitizerScope = false;
+}
+
+void CodeGenFunction::InsertHelper(llvm::Instruction *I,
+                                   const llvm::Twine &Name,
+                                   llvm::BasicBlock *BB,
+                                   llvm::BasicBlock::iterator InsertPt) const {
+  LoopStack.InsertHelper(I);
+  if (IsSanitizerScope) {
+    I->setMetadata(
+        CGM.getModule().getMDKindID("nosanitize"),
+        llvm::MDNode::get(CGM.getLLVMContext(), ArrayRef<llvm::Value *>()));
+  }
+}
+
+template <bool PreserveNames>
+void CGBuilderInserter<PreserveNames>::InsertHelper(
+    llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
+    llvm::BasicBlock::iterator InsertPt) const {
+  llvm::IRBuilderDefaultInserter<PreserveNames>::InsertHelper(I, Name, BB,
+                                                              InsertPt);
+  if (CGF)
+    CGF->InsertHelper(I, Name, BB, InsertPt);
+}
+
+#ifdef NDEBUG
+#define PreserveNames false
+#else
+#define PreserveNames true
+#endif
+template void CGBuilderInserter<PreserveNames>::InsertHelper(
+    llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
+    llvm::BasicBlock::iterator InsertPt) const;
+#undef PreserveNames
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index db291e3b1ddd..59cc30da42c5 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -16,9 +16,11 @@
 
 #include "CGBuilder.h"
 #include "CGDebugInfo.h"
+#include "CGLoopInfo.h"
 #include "CGValue.h"
-#include "EHScopeStack.h"
 #include "CodeGenModule.h"
+#include "CodeGenPGO.h"
+#include "EHScopeStack.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
@@ -30,55 +32,55 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
-  class BasicBlock;
-  class LLVMContext;
-  class MDNode;
-  class Module;
-  class SwitchInst;
-  class Twine;
-  class Value;
-  class CallSite;
+class BasicBlock;
+class LLVMContext;
+class MDNode;
+class Module;
+class SwitchInst;
+class Twine;
+class Value;
+class CallSite;
 }
 
 namespace clang {
-  class ASTContext;
-  class BlockDecl;
-  class CXXDestructorDecl;
-  class CXXForRangeStmt;
-  class CXXTryStmt;
-  class Decl;
-  class LabelDecl;
-  class EnumConstantDecl;
-  class FunctionDecl;
-  class FunctionProtoType;
-  class LabelStmt;
-  class ObjCContainerDecl;
-  class ObjCInterfaceDecl;
-  class ObjCIvarDecl;
-  class ObjCMethodDecl;
-  class ObjCImplementationDecl;
-  class ObjCPropertyImplDecl;
-  class TargetInfo;
-  class TargetCodeGenInfo;
-  class VarDecl;
-  class ObjCForCollectionStmt;
-  class ObjCAtTryStmt;
-  class ObjCAtThrowStmt;
-  class ObjCAtSynchronizedStmt;
-  class ObjCAutoreleasePoolStmt;
+class ASTContext;
+class BlockDecl;
+class CXXDestructorDecl;
+class CXXForRangeStmt;
+class CXXTryStmt;
+class Decl;
+class LabelDecl;
+class EnumConstantDecl;
+class FunctionDecl;
+class FunctionProtoType;
+class LabelStmt;
+class ObjCContainerDecl;
+class ObjCInterfaceDecl;
+class ObjCIvarDecl;
+class ObjCMethodDecl;
+class ObjCImplementationDecl;
+class ObjCPropertyImplDecl;
+class TargetInfo;
+class TargetCodeGenInfo;
+class VarDecl;
+class ObjCForCollectionStmt;
+class ObjCAtTryStmt;
+class ObjCAtThrowStmt;
+class ObjCAtSynchronizedStmt;
+class ObjCAutoreleasePoolStmt;
 
 namespace CodeGen {
-  class CodeGenTypes;
-  class CGFunctionInfo;
-  class CGRecordLayout;
-  class CGBlockInfo;
-  class CGCXXABI;
-  class BlockFlags;
-  class BlockFieldFlags;
+class CodeGenTypes;
+class CGFunctionInfo;
+class CGRecordLayout;
+class CGBlockInfo;
+class CGCXXABI;
+class BlockFlags;
+class BlockFieldFlags;
 
 /// The kind of evaluation to perform on values of a particular
 /// type.  Basically, is the code in CGExprScalar, CGExprComplex, or
@@ -91,6 +93,19 @@ enum TypeEvaluationKind {
   TEK_Aggregate
 };
 
+class SuppressDebugLocation {
+  llvm::DebugLoc CurLoc;
+  llvm::IRBuilderBase &Builder;
+public:
+  SuppressDebugLocation(llvm::IRBuilderBase &Builder)
+      : CurLoc(Builder.getCurrentDebugLocation()), Builder(Builder) {
+    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
+  }
+  ~SuppressDebugLocation() {
+    Builder.SetCurrentDebugLocation(CurLoc);
+  }
+};
+
 /// CodeGenFunction - This class organizes the per-function state that is used
 /// while generating LLVM code.
 class CodeGenFunction : public CodeGenTypeCache {
@@ -102,13 +117,13 @@ public:
   /// A jump destination is an abstract label, branching to which may
   /// require a jump out through normal cleanups.
   struct JumpDest {
-    JumpDest() : Block(0), ScopeDepth(), Index(0) {}
+    JumpDest() : Block(nullptr), ScopeDepth(), Index(0) {}
     JumpDest(llvm::BasicBlock *Block,
              EHScopeStack::stable_iterator Depth,
              unsigned Index)
       : Block(Block), ScopeDepth(Depth), Index(Index) {}
 
-    bool isValid() const { return Block != 0; }
+    bool isValid() const { return Block != nullptr; }
     llvm::BasicBlock *getBlock() const { return Block; }
     EHScopeStack::stable_iterator getScopeDepth() const { return ScopeDepth; }
     unsigned getDestIndex() const { return Index; }
@@ -128,8 +143,15 @@ public:
   const TargetInfo &Target;
 
   typedef std::pair<llvm::Value *, llvm::Value *> ComplexPairTy;
+  LoopInfoStack LoopStack;
   CGBuilderTy Builder;
 
+  /// \brief CGBuilder insert helper. This function is called after an
+  /// instruction is created using Builder.
+  void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
+                    llvm::BasicBlock *BB,
+                    llvm::BasicBlock::iterator InsertPt) const;
+
   /// CurFuncDecl - Holds the Decl for the current outermost
   /// non-closure context.
   const Decl *CurFuncDecl;
@@ -162,7 +184,7 @@ public:
   public:
     explicit CGCapturedStmtInfo(const CapturedStmt &S,
                                 CapturedRegionKind K = CR_Default)
-      : Kind(K), ThisValue(0), CXXThisFieldDecl(0) {
+      : Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) {
 
       RecordDecl::field_iterator Field =
         S.getCapturedRecordDecl()->field_begin();
@@ -189,11 +211,13 @@ public:
       return CaptureFields.lookup(VD);
     }
 
-    bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != 0; }
+    bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != nullptr; }
     FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
 
     /// \brief Emit the captured statement body.
     virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) {
+      RegionCounter Cnt = CGF.getPGORegionCounter(S);
+      Cnt.beginRegion(CGF.Builder);
       CGF.EmitStmt(S);
     }
 
@@ -220,13 +244,20 @@ public:
   /// potentially higher performance penalties.
   unsigned char BoundsChecking;
 
-  /// \brief Whether any type-checking sanitizers are enabled. If \c false,
-  /// calls to EmitTypeCheck can be skipped.
-  bool SanitizePerformTypeCheck;
-
   /// \brief Sanitizer options to use for this function.
   const SanitizerOptions *SanOpts;
 
+  /// \brief True if CodeGen currently emits code implementing sanitizer checks.
+  bool IsSanitizerScope;
+
+  /// \brief RAII object to set/unset CodeGenFunction::IsSanitizerScope.
+  class SanitizerScope {
+    CodeGenFunction *CGF;
+  public:
+    SanitizerScope(CodeGenFunction *CGF);
+    ~SanitizerScope();
+  };
+
   /// In ARC, whether we should autorelease the return value.
   bool AutoreleaseResult;
 
@@ -608,9 +639,9 @@ public:
     }
 
     void end(CodeGenFunction &CGF) {
-      assert(CGF.OutermostConditional != 0);
+      assert(CGF.OutermostConditional != nullptr);
       if (CGF.OutermostConditional == this)
-        CGF.OutermostConditional = 0;
+        CGF.OutermostConditional = nullptr;
     }
 
     /// Returns a block which will be executed prior to each
@@ -622,7 +653,7 @@ public:
 
   /// isInConditionalBranch - Return true if we're currently emitting
   /// one branch or the other of a conditional expression.
-  bool isInConditionalBranch() const { return OutermostConditional != 0; }
+  bool isInConditionalBranch() const { return OutermostConditional != nullptr; }
 
   void setBeforeOutermostConditional(llvm::Value *value, llvm::Value *addr) {
     assert(isInConditionalBranch());
@@ -643,7 +674,7 @@ public:
   public:
     StmtExprEvaluation(CodeGenFunction &CGF)
       : CGF(CGF), SavedOutermostConditional(CGF.OutermostConditional) {
-      CGF.OutermostConditional = 0;
+      CGF.OutermostConditional = nullptr;
     }
 
     ~StmtExprEvaluation() {
@@ -660,7 +691,7 @@ public:
     friend class CodeGenFunction;
 
   public:
-    PeepholeProtection() : Inst(0) {}
+    PeepholeProtection() : Inst(nullptr) {}
   };
 
   /// A non-RAII class containing all the information about a bound
@@ -678,7 +709,7 @@ public:
                            bool boundLValue)
       : OpaqueValue(ov), BoundLValue(boundLValue) {}
   public:
-    OpaqueValueMappingData() : OpaqueValue(0) {}
+    OpaqueValueMappingData() : OpaqueValue(nullptr) {}
 
     static bool shouldBindAsLValue(const Expr *expr) {
       // gl-values should be bound as l-values for obvious reasons.
@@ -687,8 +718,8 @@ public:
       // act exactly like l-values but are formally required to be
       // r-values in C.
       return expr->isGLValue() ||
-             expr->getType()->isRecordType() ||
-             expr->getType()->isFunctionType();
+             expr->getType()->isFunctionType() ||
+             hasAggregateEvaluationKind(expr->getType());
     }
 
     static OpaqueValueMappingData bind(CodeGenFunction &CGF,
@@ -723,8 +754,8 @@ public:
       return data;
     }
 
-    bool isValid() const { return OpaqueValue != 0; }
-    void clear() { OpaqueValue = 0; }
+    bool isValid() const { return OpaqueValue != nullptr; }
+    void clear() { OpaqueValue = nullptr; }
 
     void unbind(CodeGenFunction &CGF) {
       assert(OpaqueValue && "no data to unbind!");
@@ -827,9 +858,21 @@ private:
   };
   SmallVector<BreakContinue, 8> BreakContinueStack;
 
+  CodeGenPGO PGO;
+
+public:
+  /// Get a counter for instrumentation of the region associated with the given
+  /// statement.
+  RegionCounter getPGORegionCounter(const Stmt *S) {
+    return RegionCounter(PGO, S);
+  }
+private:
+
   /// SwitchInsn - This is nearest current switch instruction. It is null if
   /// current context is not in a switch.
   llvm::SwitchInst *SwitchInsn;
+  /// The branch weights of SwitchInsn when doing instrumentation based PGO.
+  SmallVector<uint64_t, 16> *SwitchWeights;
 
   /// CaseRangeBlock - This block holds if condition check for last case
   /// statement range in current switch instruction.
@@ -955,7 +998,7 @@ public:
   ASTContext &getContext() const { return CGM.getContext(); }
   CGDebugInfo *getDebugInfo() { 
     if (DisableDebugInfo) 
-      return NULL;
+      return nullptr;
     return DebugInfo; 
   }
   void disableDebugInfo() { DisableDebugInfo = true; }
@@ -988,7 +1031,7 @@ public:
   }
 
   llvm::BasicBlock *getInvokeDest() {
-    if (!EHStack.requiresLandingPad()) return 0;
+    if (!EHStack.requiresLandingPad()) return nullptr;
     return getInvokeDestImpl();
   }
 
@@ -1019,6 +1062,7 @@ public:
   void pushLifetimeExtendedDestroy(CleanupKind kind, llvm::Value *addr,
                                    QualType type, Destroyer *destroyer,
                                    bool useEHCleanupForArray);
+  void pushStackRestore(CleanupKind kind, llvm::Value *SPMem);
   void emitDestroy(llvm::Value *addr, QualType type, Destroyer *destroyer,
                    bool useEHCleanupForArray);
   llvm::Function *generateDestroyHelper(llvm::Constant *addr, QualType type,
@@ -1126,17 +1170,22 @@ public:
 
   void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                     const CGFunctionInfo &FnInfo);
+  /// \brief Emit code for the start of a function.
+  /// \param Loc       The location to be associated with the function.
+  /// \param StartLoc  The location of the function body.
   void StartFunction(GlobalDecl GD,
                      QualType RetTy,
                      llvm::Function *Fn,
                      const CGFunctionInfo &FnInfo,
                      const FunctionArgList &Args,
-                     SourceLocation StartLoc);
+                     SourceLocation Loc = SourceLocation(),
+                     SourceLocation StartLoc = SourceLocation());
 
   void EmitConstructorBody(FunctionArgList &Args);
   void EmitDestructorBody(FunctionArgList &Args);
   void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
   void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body);
+  void EmitBlockWithFallThrough(llvm::BasicBlock *BB, RegionCounter &Cnt);
 
   void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
                                   CallArgList &CallArgs);
@@ -1270,8 +1319,8 @@ public:
 
   /// createBasicBlock - Create an LLVM basic block.
   llvm::BasicBlock *createBasicBlock(const Twine &name = "",
-                                     llvm::Function *parent = 0,
-                                     llvm::BasicBlock *before = 0) {
+                                     llvm::Function *parent = nullptr,
+                                     llvm::BasicBlock *before = nullptr) {
 #ifdef NDEBUG
     return llvm::BasicBlock::Create(getLLVMContext(), "", parent, before);
 #else
@@ -1315,7 +1364,7 @@ public:
   /// HaveInsertPoint - True if an insertion point is defined. If not, this
   /// indicates that the current code being emitted is unreachable.
   bool HaveInsertPoint() const {
-    return Builder.GetInsertBlock() != 0;
+    return Builder.GetInsertBlock() != nullptr;
   }
 
   /// EnsureInsertPoint - Ensure that an insertion point is defined so that
@@ -1380,6 +1429,10 @@ public:
                                  AggValueSlot::IsNotAliased);
   }
 
+  /// CreateInAllocaTmp - Create a temporary memory object for the given
+  /// aggregate type.
+  AggValueSlot CreateInAllocaTmp(QualType T, const Twine &Name = "inalloca");
+
   /// Emit a cast to void* in the appropriate address space.
   llvm::Value *EmitCastToVoidPtr(llvm::Value *value);
 
@@ -1614,7 +1667,8 @@ public:
                              llvm::Value *This);
 
   void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType,
-                               llvm::Value *NewPtr, llvm::Value *NumElements);
+                               llvm::Value *NewPtr, llvm::Value *NumElements,
+                               llvm::Value *AllocSizeWithoutCookie);
 
   void EmitCXXTemporary(const CXXTemporary *Temporary, QualType TempType,
                         llvm::Value *Ptr);
@@ -1625,6 +1679,9 @@ public:
   void EmitDeleteCall(const FunctionDecl *DeleteFD, llvm::Value *Ptr,
                       QualType DeleteTy);
 
+  RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
+                                  const Expr *Arg, bool IsDelete);
+
   llvm::Value* EmitCXXTypeidExpr(const CXXTypeidExpr *E);
   llvm::Value *EmitDynamicCast(llvm::Value *V, const CXXDynamicCastExpr *DCE);
   llvm::Value* EmitCXXUuidofExpr(const CXXUuidofExpr *E);
@@ -1656,6 +1713,10 @@ public:
     TCK_DowncastReference
   };
 
+  /// \brief Whether any type-checking sanitizers are enabled. If \c false,
+  /// calls to EmitTypeCheck can be skipped.
+  bool sanitizePerformTypeCheck() const;
+
   /// \brief Emit a check that \p V is the address of storage of the
   /// appropriate size and alignment for an object of type \p Type.
   void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V,
@@ -1722,19 +1783,21 @@ public:
     llvm::Value *SizeForLifetimeMarkers;
 
     struct Invalid {};
-    AutoVarEmission(Invalid) : Variable(0) {}
+    AutoVarEmission(Invalid) : Variable(nullptr) {}
 
     AutoVarEmission(const VarDecl &variable)
-      : Variable(&variable), Address(0), NRVOFlag(0),
+      : Variable(&variable), Address(nullptr), NRVOFlag(nullptr),
         IsByRef(false), IsConstantAggregate(false),
-        SizeForLifetimeMarkers(0) {}
+        SizeForLifetimeMarkers(nullptr) {}
 
-    bool wasEmittedAsGlobal() const { return Address == 0; }
+    bool wasEmittedAsGlobal() const { return Address == nullptr; }
 
   public:
     static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }
 
-    bool useLifetimeMarkers() const { return SizeForLifetimeMarkers != 0; }
+    bool useLifetimeMarkers() const {
+      return SizeForLifetimeMarkers != nullptr;
+    }
     llvm::Value *getSizeForLifetimeMarkers() const {
       assert(useLifetimeMarkers());
       return SizeForLifetimeMarkers;
@@ -1767,7 +1830,8 @@ public:
                          llvm::GlobalValue::LinkageTypes Linkage);
 
   /// EmitParmDecl - Emit a ParmVarDecl or an ImplicitParamDecl.
-  void EmitParmDecl(const VarDecl &D, llvm::Value *Arg, unsigned ArgNo);
+  void EmitParmDecl(const VarDecl &D, llvm::Value *Arg, bool ArgIsPointer,
+                    unsigned ArgNo);
 
   /// protectFromPeepholes - Protect a value that we're intending to
   /// store to the side, but which will probably be used later, from
@@ -1820,9 +1884,14 @@ public:
   void EmitGotoStmt(const GotoStmt &S);
   void EmitIndirectGotoStmt(const IndirectGotoStmt &S);
   void EmitIfStmt(const IfStmt &S);
-  void EmitWhileStmt(const WhileStmt &S);
-  void EmitDoStmt(const DoStmt &S);
-  void EmitForStmt(const ForStmt &S);
+
+  void EmitCondBrHints(llvm::LLVMContext &Context, llvm::BranchInst *CondBr,
+                       const ArrayRef<const Attr *> &Attrs);
+  void EmitWhileStmt(const WhileStmt &S,
+                     const ArrayRef<const Attr *> &Attrs = None);
+  void EmitDoStmt(const DoStmt &S, const ArrayRef<const Attr *> &Attrs = None);
+  void EmitForStmt(const ForStmt &S,
+                   const ArrayRef<const Attr *> &Attrs = None);
   void EmitReturnStmt(const ReturnStmt &S);
   void EmitDeclStmt(const DeclStmt &S);
   void EmitBreakStmt(const BreakStmt &S);
@@ -1839,19 +1908,34 @@ public:
   void EmitObjCAtSynchronizedStmt(const ObjCAtSynchronizedStmt &S);
   void EmitObjCAutoreleasePoolStmt(const ObjCAutoreleasePoolStmt &S);
 
-  llvm::Constant *getUnwindResumeFn();
-  llvm::Constant *getUnwindResumeOrRethrowFn();
   void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
   void ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
 
   void EmitCXXTryStmt(const CXXTryStmt &S);
   void EmitSEHTryStmt(const SEHTryStmt &S);
-  void EmitCXXForRangeStmt(const CXXForRangeStmt &S);
+  void EmitSEHLeaveStmt(const SEHLeaveStmt &S);
+  void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
+                           const ArrayRef<const Attr *> &Attrs = None);
 
   llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
-  llvm::Function *GenerateCapturedStmtFunction(const CapturedDecl *CD,
-                                               const RecordDecl *RD,
-                                               SourceLocation Loc);
+  llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
+  llvm::Value *GenerateCapturedStmtArgument(const CapturedStmt &S);
+
+  void EmitOMPParallelDirective(const OMPParallelDirective &S);
+  void EmitOMPSimdDirective(const OMPSimdDirective &S);
+  void EmitOMPForDirective(const OMPForDirective &S);
+  void EmitOMPSectionsDirective(const OMPSectionsDirective &S);
+  void EmitOMPSectionDirective(const OMPSectionDirective &S);
+  void EmitOMPSingleDirective(const OMPSingleDirective &S);
+  void EmitOMPMasterDirective(const OMPMasterDirective &S);
+  void EmitOMPCriticalDirective(const OMPCriticalDirective &S);
+  void EmitOMPParallelForDirective(const OMPParallelForDirective &S);
+  void EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective &S);
+  void EmitOMPTaskDirective(const OMPTaskDirective &S);
+  void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S);
+  void EmitOMPBarrierDirective(const OMPBarrierDirective &S);
+  void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S);
+  void EmitOMPFlushDirective(const OMPFlushDirective &S);
 
   //===--------------------------------------------------------------------===//
   //                         LValue Expression Emission
@@ -1918,7 +2002,7 @@ public:
   llvm::Value *EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
                                 unsigned Alignment, QualType Ty,
                                 SourceLocation Loc,
-                                llvm::MDNode *TBAAInfo = 0,
+                                llvm::MDNode *TBAAInfo = nullptr,
                                 QualType TBAABaseTy = QualType(),
                                 uint64_t TBAAOffset = 0);
 
@@ -1933,7 +2017,7 @@ public:
   /// the LLVM value representation.
   void EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
                          bool Volatile, unsigned Alignment, QualType Ty,
-                         llvm::MDNode *TBAAInfo = 0, bool isInit = false,
+                         llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
                          QualType TBAABaseTy = QualType(),
                          uint64_t TBAAOffset = 0);
 
@@ -1950,12 +2034,14 @@ public:
   RValue EmitLoadOfLValue(LValue V, SourceLocation Loc);
   RValue EmitLoadOfExtVectorElementLValue(LValue V);
   RValue EmitLoadOfBitfieldLValue(LValue LV);
+  RValue EmitLoadOfGlobalRegLValue(LValue LV);
 
   /// EmitStoreThroughLValue - Store the specified rvalue into the specified
   /// lvalue, where both are guaranteed to the have the same type, and that type
   /// is 'Ty'.
   void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false);
   void EmitStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst);
+  void EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst);
 
   /// EmitStoreThroughBitfieldLValue - Store Src into Dst with same constraints
   /// as EmitStoreThroughLValue.
@@ -1964,7 +2050,7 @@ public:
   /// bit-field contents after the store, appropriate for use as the result of
   /// an assignment to the bit-field.
   void EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
-                                      llvm::Value **Result=0);
+                                      llvm::Value **Result=nullptr);
 
   /// Emit an l-value for an assignment (simple or compound) of complex type.
   LValue EmitComplexAssignmentLValue(const BinaryOperator *E);
@@ -1980,6 +2066,7 @@ public:
   // Note: only available for agg return types
   LValue EmitVAArgExprLValue(const VAArgExpr *E);
   LValue EmitDeclRefLValue(const DeclRefExpr *E);
+  LValue EmitReadRegister(const VarDecl *VD);
   LValue EmitStringLiteralLValue(const StringLiteral *E);
   LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E);
   LValue EmitPredefinedLValue(const PredefinedExpr *E);
@@ -2011,7 +2098,9 @@ public:
       return ConstantEmission(C, false);
     }
 
-    LLVM_EXPLICIT operator bool() const { return ValueAndIsReference.getOpaqueValue() != 0; }
+    LLVM_EXPLICIT operator bool() const {
+      return ValueAndIsReference.getOpaqueValue() != nullptr;
+    }
 
     bool isReference() const { return ValueAndIsReference.getInt(); }
     LValue getReferenceLValue(CodeGenFunction &CGF, Expr *refExpr) const {
@@ -2074,15 +2163,15 @@ public:
                   llvm::Value *Callee,
                   ReturnValueSlot ReturnValue,
                   const CallArgList &Args,
-                  const Decl *TargetDecl = 0,
-                  llvm::Instruction **callOrInvoke = 0);
+                  const Decl *TargetDecl = nullptr,
+                  llvm::Instruction **callOrInvoke = nullptr);
 
   RValue EmitCall(QualType FnType, llvm::Value *Callee,
                   SourceLocation CallLoc,
                   ReturnValueSlot ReturnValue,
                   CallExpr::const_arg_iterator ArgBeg,
                   CallExpr::const_arg_iterator ArgEnd,
-                  const Decl *TargetDecl = 0);
+                  const Decl *TargetDecl = nullptr);
   RValue EmitCallExpr(const CallExpr *E,
                       ReturnValueSlot ReturnValue = ReturnValueSlot());
 
@@ -2156,9 +2245,19 @@ public:
                                              const llvm::CmpInst::Predicate Fp,
                                              const llvm::CmpInst::Predicate Ip,
                                              const llvm::Twine &Name = "");
-  llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty);
-  llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+
+  llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
+                                         unsigned LLVMIntrinsic,
+                                         unsigned AltLLVMIntrinsic,
+                                         const char *NameHint,
+                                         unsigned Modifier,
+                                         const CallExpr *E,
+                                         SmallVectorImpl<llvm::Value *> &Ops,
+                                         llvm::Value *Align = nullptr);
+  llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
+                                          unsigned Modifier, llvm::Type *ArgTy,
+                                          const CallExpr *E);
   llvm::Value *EmitNeonCall(llvm::Function *F,
                             SmallVectorImpl<llvm::Value*> &O,
                             const char *name,
@@ -2168,10 +2267,22 @@ public:
                                    bool negateForRightShift);
   llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
                                  llvm::Type *Ty, bool usgn, const char *name);
+  // Helper functions for EmitAArch64BuiltinExpr.
+  llvm::Value *vectorWrapScalar8(llvm::Value *Op);
+  llvm::Value *vectorWrapScalar16(llvm::Value *Op);
+  llvm::Value *emitVectorWrappedScalar8Intrinsic(
+      unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
+  llvm::Value *emitVectorWrappedScalar16Intrinsic(
+      unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
+  llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitNeon64Call(llvm::Function *F,
+                              llvm::SmallVectorImpl<llvm::Value *> &O,
+                              const char *name);
 
   llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitR600BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
   llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E);
@@ -2231,7 +2342,7 @@ public:
   llvm::Value *EmitARCRetainScalarExpr(const Expr *expr);
   llvm::Value *EmitARCRetainAutoreleaseScalarExpr(const Expr *expr);
 
-  void EmitARCIntrinsicUse(llvm::ArrayRef<llvm::Value*> values);
+  void EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values);
 
   static Destroyer destroyARCStrongImprecise;
   static Destroyer destroyARCStrongPrecise;
@@ -2304,9 +2415,9 @@ public:
 
   /// CreateStaticVarDecl - Create a zero-initialized LLVM global for
   /// a static local variable.
-  llvm::GlobalVariable *CreateStaticVarDecl(const VarDecl &D,
-                                            const char *Separator,
-                                       llvm::GlobalValue::LinkageTypes Linkage);
+  llvm::Constant *CreateStaticVarDecl(const VarDecl &D,
+                                      const char *Separator,
+                                      llvm::GlobalValue::LinkageTypes Linkage);
 
   /// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
   /// global variable that has already been created for it.  If the initializer
@@ -2339,7 +2450,7 @@ public:
   /// variables.
   void GenerateCXXGlobalInitFunc(llvm::Function *Fn,
                                  ArrayRef<llvm::Constant *> Decls,
-                                 llvm::GlobalVariable *Guard = 0);
+                                 llvm::GlobalVariable *Guard = nullptr);
 
   /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
   /// variables.
@@ -2367,7 +2478,7 @@ public:
 
   void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest);
 
-  RValue EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest = 0);
+  RValue EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest = nullptr);
 
   //===--------------------------------------------------------------------===//
   //                         Annotations Emission
@@ -2413,8 +2524,10 @@ public:
   /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
   /// if statement) to the specified blocks.  Based on the condition, this might
   /// try to simplify the codegen of the conditional based on the branch.
+  /// TrueCount should be the number of times we expect the condition to
+  /// evaluate to true based on PGO data.
   void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
-                            llvm::BasicBlock *FalseBlock);
+                            llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
 
   /// \brief Emit a description of a type in a format suitable for passing to
   /// a runtime sanitizer handler.
@@ -2467,6 +2580,11 @@ private:
   llvm::MDNode *getRangeForLoadFromType(QualType Ty);
   void EmitReturnOfRValue(RValue RV, QualType Ty);
 
+  void deferPlaceholderReplacement(llvm::Instruction *Old, llvm::Value *New);
+
+  llvm::SmallVector<std::pair<llvm::Instruction *, llvm::Value *>, 4>
+  DeferredReplacements;
+
   /// ExpandTypeFromArgs - Reconstruct a structure of type \arg Ty
   /// from function arguments into \arg Dst. See ABIArgInfo::Expand.
   ///
@@ -2492,69 +2610,82 @@ private:
                                   std::string &ConstraintStr,
                                   SourceLocation Loc);
 
+public:
   /// EmitCallArgs - Emit call arguments for a function.
-  /// The CallArgTypeInfo parameter is used for iterating over the known
-  /// argument types of the function being called.
-  template<typename T>
-  void EmitCallArgs(CallArgList& Args, const T* CallArgTypeInfo,
+  template <typename T>
+  void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo,
                     CallExpr::const_arg_iterator ArgBeg,
                     CallExpr::const_arg_iterator ArgEnd,
                     bool ForceColumnInfo = false) {
-    CGDebugInfo *DI = getDebugInfo();
-    SourceLocation CallLoc;
-    if (DI) CallLoc = DI->getLocation();
+    if (CallArgTypeInfo) {
+      EmitCallArgs(Args, CallArgTypeInfo->isVariadic(),
+                   CallArgTypeInfo->param_type_begin(),
+                   CallArgTypeInfo->param_type_end(), ArgBeg, ArgEnd,
+                   ForceColumnInfo);
+    } else {
+      // T::param_type_iterator might not have a default ctor.
+      const QualType *NoIter = nullptr;
+      EmitCallArgs(Args, /*AllowExtraArguments=*/true, NoIter, NoIter, ArgBeg,
+                   ArgEnd, ForceColumnInfo);
+    }
+  }
 
+  template<typename ArgTypeIterator>
+  void EmitCallArgs(CallArgList& Args,
+                    bool AllowExtraArguments,
+                    ArgTypeIterator ArgTypeBeg,
+                    ArgTypeIterator ArgTypeEnd,
+                    CallExpr::const_arg_iterator ArgBeg,
+                    CallExpr::const_arg_iterator ArgEnd,
+                    bool ForceColumnInfo = false) {
+    SmallVector<QualType, 16> ArgTypes;
     CallExpr::const_arg_iterator Arg = ArgBeg;
 
     // First, use the argument types that the type info knows about
-    if (CallArgTypeInfo) {
-      for (typename T::arg_type_iterator I = CallArgTypeInfo->arg_type_begin(),
-           E = CallArgTypeInfo->arg_type_end(); I != E; ++I, ++Arg) {
-        assert(Arg != ArgEnd && "Running over edge of argument list!");
-        QualType ArgType = *I;
+    for (ArgTypeIterator I = ArgTypeBeg, E = ArgTypeEnd; I != E; ++I, ++Arg) {
+      assert(Arg != ArgEnd && "Running over edge of argument list!");
 #ifndef NDEBUG
-        QualType ActualArgType = Arg->getType();
-        if (ArgType->isPointerType() && ActualArgType->isPointerType()) {
-          QualType ActualBaseType =
+      QualType ArgType = *I;
+      QualType ActualArgType = Arg->getType();
+      if (ArgType->isPointerType() && ActualArgType->isPointerType()) {
+        QualType ActualBaseType =
             ActualArgType->getAs<PointerType>()->getPointeeType();
-          QualType ArgBaseType =
+        QualType ArgBaseType =
             ArgType->getAs<PointerType>()->getPointeeType();
-          if (ArgBaseType->isVariableArrayType()) {
-            if (const VariableArrayType *VAT =
-                getContext().getAsVariableArrayType(ActualBaseType)) {
-              if (!VAT->getSizeExpr())
-                ActualArgType = ArgType;
-            }
+        if (ArgBaseType->isVariableArrayType()) {
+          if (const VariableArrayType *VAT =
+              getContext().getAsVariableArrayType(ActualBaseType)) {
+            if (!VAT->getSizeExpr())
+              ActualArgType = ArgType;
           }
         }
-        assert(getContext().getCanonicalType(ArgType.getNonReferenceType()).
-               getTypePtr() ==
-               getContext().getCanonicalType(ActualArgType).getTypePtr() &&
-               "type mismatch in call argument!");
-#endif
-        EmitCallArg(Args, *Arg, ArgType);
-
-        // Each argument expression could modify the debug
-        // location. Restore it.
-        if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
       }
-
-      // Either we've emitted all the call args, or we have a call to a
-      // variadic function.
-      assert((Arg == ArgEnd || CallArgTypeInfo->isVariadic()) &&
-             "Extra arguments in non-variadic function!");
-
+      assert(getContext().getCanonicalType(ArgType.getNonReferenceType()).
+             getTypePtr() ==
+             getContext().getCanonicalType(ActualArgType).getTypePtr() &&
+             "type mismatch in call argument!");
+#endif
+      ArgTypes.push_back(*I);
     }
 
+    // Either we've emitted all the call args, or we have a call to variadic
+    // function or some other call that allows extra arguments.
+    assert((Arg == ArgEnd || AllowExtraArguments) &&
+           "Extra arguments in non-variadic function!");
+
     // If we still have any arguments, emit them using the type of the argument.
-    for (; Arg != ArgEnd; ++Arg) {
-      EmitCallArg(Args, *Arg, Arg->getType());
+    for (; Arg != ArgEnd; ++Arg)
+      ArgTypes.push_back(Arg->getType());
 
-      // Restore the debug location.
-      if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
-    }
+    EmitCallArgs(Args, ArgTypes, ArgBeg, ArgEnd, ForceColumnInfo);
   }
 
+  void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes,
+                    CallExpr::const_arg_iterator ArgBeg,
+                    CallExpr::const_arg_iterator ArgEnd,
+                    bool ForceColumnInfo = false);
+
+private:
   const TargetCodeGenInfo &getTargetHooks() const {
     return CGM.getTargetCodeGenInfo();
   }
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 792fbfce3349..48823befcc07 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -18,7 +18,9 @@
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
+#include "CodeGenPGO.h"
 #include "CodeGenTBAA.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
@@ -40,30 +42,31 @@
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/Mangler.h"
 
 using namespace clang;
 using namespace CodeGen;
 
 static const char AnnotationSection[] = "llvm.metadata";
 
-static CGCXXABI &createCXXABI(CodeGenModule &CGM) {
+static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
   switch (CGM.getTarget().getCXXABI().getKind()) {
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::iOS:
+  case TargetCXXABI::iOS64:
   case TargetCXXABI::GenericItanium:
-    return *CreateItaniumCXXABI(CGM);
+    return CreateItaniumCXXABI(CGM);
   case TargetCXXABI::Microsoft:
-    return *CreateMicrosoftCXXABI(CGM);
+    return CreateMicrosoftCXXABI(CGM);
   }
 
   llvm_unreachable("invalid C++ ABI kind");
@@ -74,18 +77,18 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
                              DiagnosticsEngine &diags)
     : Context(C), LangOpts(C.getLangOpts()), CodeGenOpts(CGO), TheModule(M),
       Diags(diags), TheDataLayout(TD), Target(C.getTargetInfo()),
-      ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
-      TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
-      OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
-      NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0),
-      ConstantStringClassRef(0), NSConstantStringType(0),
-      NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
-      BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
-      LifetimeStartFn(0), LifetimeEndFn(0),
-      SanitizerBlacklist(
-          llvm::SpecialCaseList::createOrDie(CGO.SanitizerBlacklistFile)),
-      SanOpts(SanitizerBlacklist->isIn(M) ? SanitizerOptions::Disabled
-                                          : LangOpts.Sanitize) {
+      ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(nullptr),
+      TheTargetCodeGenInfo(nullptr), Types(*this), VTables(*this),
+      ObjCRuntime(nullptr), OpenCLRuntime(nullptr), OpenMPRuntime(nullptr),
+      CUDARuntime(nullptr), DebugInfo(nullptr), ARCData(nullptr),
+      NoObjCARCExceptionsMetadata(nullptr), RRData(nullptr), PGOReader(nullptr),
+      CFConstantStringClassRef(nullptr), ConstantStringClassRef(nullptr),
+      NSConstantStringType(nullptr), NSConcreteGlobalBlock(nullptr),
+      NSConcreteStackBlock(nullptr), BlockObjectAssign(nullptr),
+      BlockObjectDispose(nullptr), BlockDescriptorType(nullptr),
+      GenericBlockLiteralType(nullptr), LifetimeStartFn(nullptr),
+      LifetimeEndFn(nullptr), SanitizerBL(llvm::SpecialCaseList::createOrDie(
+                                  CGO.SanitizerBlacklistFile)) {
 
   // Initialize the type cache.
   llvm::LLVMContext &LLVMContext = M.getContext();
@@ -110,14 +113,16 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
     createObjCRuntime();
   if (LangOpts.OpenCL)
     createOpenCLRuntime();
+  if (LangOpts.OpenMP)
+    createOpenMPRuntime();
   if (LangOpts.CUDA)
     createCUDARuntime();
 
   // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
-  if (SanOpts.Thread ||
+  if (LangOpts.Sanitize.Thread ||
       (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
     TBAA = new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
-                           ABI.getMangleContext());
+                           getCXXABI().getMangleContext());
 
   // If debug info or coverage generation is enabled, create the CGDebugInfo
   // object.
@@ -131,14 +136,23 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
   if (C.getLangOpts().ObjCAutoRefCount)
     ARCData = new ARCEntrypoints();
   RRData = new RREntrypoints();
+
+  if (!CodeGenOpts.InstrProfileInput.empty()) {
+    if (std::error_code EC = llvm::IndexedInstrProfReader::create(
+            CodeGenOpts.InstrProfileInput, PGOReader)) {
+      unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
+                                              "Could not read profile: %0");
+      getDiags().Report(DiagID) << EC.message();
+    }
+  }
 }
 
 CodeGenModule::~CodeGenModule() {
   delete ObjCRuntime;
   delete OpenCLRuntime;
+  delete OpenMPRuntime;
   delete CUDARuntime;
   delete TheTargetCodeGenInfo;
-  delete &ABI;
   delete TBAA;
   delete DebugInfo;
   delete ARCData;
@@ -168,6 +182,10 @@ void CodeGenModule::createOpenCLRuntime() {
   OpenCLRuntime = new CGOpenCLRuntime(*this);
 }
 
+void CodeGenModule::createOpenMPRuntime() {
+  OpenMPRuntime = new CGOpenMPRuntime(*this);
+}
+
 void CodeGenModule::createCUDARuntime() {
   CUDARuntime = CreateNVCUDARuntime(*this);
 }
@@ -181,13 +199,17 @@ void CodeGenModule::applyReplacements() {
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
     if (!Entry)
       continue;
-    llvm::Function *OldF = cast<llvm::Function>(Entry);
-    llvm::Function *NewF = dyn_cast<llvm::Function>(Replacement);
+    auto *OldF = cast<llvm::Function>(Entry);
+    auto *NewF = dyn_cast<llvm::Function>(Replacement);
     if (!NewF) {
-      llvm::ConstantExpr *CE = cast<llvm::ConstantExpr>(Replacement);
-      assert(CE->getOpcode() == llvm::Instruction::BitCast ||
-             CE->getOpcode() == llvm::Instruction::GetElementPtr);
-      NewF = dyn_cast<llvm::Function>(CE->getOperand(0));
+      if (auto *Alias = dyn_cast<llvm::GlobalAlias>(Replacement)) {
+        NewF = dyn_cast<llvm::Function>(Alias->getAliasee());
+      } else {
+        auto *CE = cast<llvm::ConstantExpr>(Replacement);
+        assert(CE->getOpcode() == llvm::Instruction::BitCast ||
+               CE->getOpcode() == llvm::Instruction::GetElementPtr);
+        NewF = dyn_cast<llvm::Function>(CE->getOperand(0));
+      }
     }
 
     // Replace old with new, but keep the old order.
@@ -200,23 +222,75 @@ void CodeGenModule::applyReplacements() {
   }
 }
 
+// This is only used in aliases that we created and we know they have a
+// linear structure.
+static const llvm::GlobalObject *getAliasedGlobal(const llvm::GlobalAlias &GA) {
+  llvm::SmallPtrSet<const llvm::GlobalAlias*, 4> Visited;
+  const llvm::Constant *C = &GA;
+  for (;;) {
+    C = C->stripPointerCasts();
+    if (auto *GO = dyn_cast<llvm::GlobalObject>(C))
+      return GO;
+    // stripPointerCasts will not walk over weak aliases.
+    auto *GA2 = dyn_cast<llvm::GlobalAlias>(C);
+    if (!GA2)
+      return nullptr;
+    if (!Visited.insert(GA2))
+      return nullptr;
+    C = GA2->getAliasee();
+  }
+}
+
 void CodeGenModule::checkAliases() {
+  // Check if the constructed aliases are well formed. It is really unfortunate
+  // that we have to do this in CodeGen, but we only construct mangled names
+  // and aliases during codegen.
   bool Error = false;
+  DiagnosticsEngine &Diags = getDiags();
   for (std::vector<GlobalDecl>::iterator I = Aliases.begin(),
          E = Aliases.end(); I != E; ++I) {
     const GlobalDecl &GD = *I;
-    const ValueDecl *D = cast<ValueDecl>(GD.getDecl());
+    const auto *D = cast<ValueDecl>(GD.getDecl());
     const AliasAttr *AA = D->getAttr<AliasAttr>();
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    llvm::GlobalAlias *Alias = cast<llvm::GlobalAlias>(Entry);
-    llvm::GlobalValue *GV = Alias->getAliasedGlobal();
-    if (GV->isDeclaration()) {
+    auto *Alias = cast<llvm::GlobalAlias>(Entry);
+    const llvm::GlobalValue *GV = getAliasedGlobal(*Alias);
+    if (!GV) {
       Error = true;
-      getDiags().Report(AA->getLocation(), diag::err_alias_to_undefined);
-    } else if (!Alias->resolveAliasedGlobal(/*stopOnWeak*/ false)) {
+      Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+    } else if (GV->isDeclaration()) {
       Error = true;
-      getDiags().Report(AA->getLocation(), diag::err_cyclic_alias);
+      Diags.Report(AA->getLocation(), diag::err_alias_to_undefined);
+    }
+
+    llvm::Constant *Aliasee = Alias->getAliasee();
+    llvm::GlobalValue *AliaseeGV;
+    if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee))
+      AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0));
+    else
+      AliaseeGV = cast<llvm::GlobalValue>(Aliasee);
+
+    if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {
+      StringRef AliasSection = SA->getName();
+      if (AliasSection != AliaseeGV->getSection())
+        Diags.Report(SA->getLocation(), diag::warn_alias_with_section)
+            << AliasSection;
+    }
+
+    // We have to handle alias to weak aliases in here. LLVM itself disallows
+    // this since the object semantics would not match the IL one. For
+    // compatibility with gcc we implement it by just pointing the alias
+    // to its aliasee's aliasee. We also warn, since the user is probably
+    // expecting the link to be weak.
+    if (auto GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) {
+      if (GA->mayBeOverridden()) {
+        Diags.Report(AA->getLocation(), diag::warn_alias_to_weak_alias)
+            << GV->getName() << GA->getName();
+        Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+            GA->getAliasee(), Alias->getType());
+        Alias->setAliasee(Aliasee);
+      }
     }
   }
   if (!Error)
@@ -227,12 +301,29 @@ void CodeGenModule::checkAliases() {
     const GlobalDecl &GD = *I;
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    llvm::GlobalAlias *Alias = cast<llvm::GlobalAlias>(Entry);
+    auto *Alias = cast<llvm::GlobalAlias>(Entry);
     Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType()));
     Alias->eraseFromParent();
   }
 }
 
+void CodeGenModule::clear() {
+  DeferredDeclsToEmit.clear();
+}
+
+void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags,
+                                       StringRef MainFile) {
+  if (!hasDiagnostics())
+    return;
+  if (VisitedInMainFile > 0 && VisitedInMainFile == MissingInMainFile) {
+    if (MainFile.empty())
+      MainFile = "<stdin>";
+    Diags.Report(diag::warn_profile_data_unprofiled) << MainFile;
+  } else
+    Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Missing
+                                                      << Mismatched;
+}
+
 void CodeGenModule::Release() {
   EmitDeferred();
   applyReplacements();
@@ -243,11 +334,16 @@ void CodeGenModule::Release() {
   if (ObjCRuntime)
     if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction())
       AddGlobalCtor(ObjCInitFunction);
+  if (getCodeGenOpts().ProfileInstrGenerate)
+    if (llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this))
+      AddGlobalCtor(PGOInit, 0);
+  if (PGOReader && PGOStats.hasDiagnostics())
+    PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
   EmitCtorList(GlobalCtors, "llvm.global_ctors");
   EmitCtorList(GlobalDtors, "llvm.global_dtors");
   EmitGlobalAnnotations();
   EmitStaticExternCAliases();
-  EmitLLVMUsed();
+  emitLLVMUsed();
 
   if (CodeGenOpts.Autolink &&
       (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) {
@@ -259,14 +355,29 @@ void CodeGenModule::Release() {
     getModule().addModuleFlag(llvm::Module::Warning, "Dwarf Version",
                               CodeGenOpts.DwarfVersion);
   if (DebugInfo)
-    // We support a single version in the linked module: error out when
-    // modules do not have the same version. We are going to implement dropping
-    // debug info when the version number is not up-to-date. Once that is
-    // done, the bitcode linker is not going to see modules with different
-    // version numbers.
-    getModule().addModuleFlag(llvm::Module::Error, "Debug Info Version",
+    // We support a single version in the linked module. The LLVM
+    // parser will drop debug info with a different version number
+    // (and warn about it, too).
+    getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version",
                               llvm::DEBUG_METADATA_VERSION);
 
+  // We need to record the widths of enums and wchar_t, so that we can generate
+  // the correct build attributes in the ARM backend.
+  llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch();
+  if (   Arch == llvm::Triple::arm
+      || Arch == llvm::Triple::armeb
+      || Arch == llvm::Triple::thumb
+      || Arch == llvm::Triple::thumbeb) {
+    // Width of wchar_t in bytes
+    uint64_t WCharWidth =
+        Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity();
+    getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
+
+    // The minimum width of an enum in bytes
+    uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4;
+    getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth);
+  }
+
   SimplifyPersonality();
 
   if (getCodeGenOpts().EmitDeclMetadata)
@@ -279,6 +390,8 @@ void CodeGenModule::Release() {
     DebugInfo->finalize();
 
   EmitVersionIdentMetadata();
+
+  EmitTargetMetadata();
 }
 
 void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
@@ -288,25 +401,25 @@ void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
 
 llvm::MDNode *CodeGenModule::getTBAAInfo(QualType QTy) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAInfo(QTy);
 }
 
 llvm::MDNode *CodeGenModule::getTBAAInfoForVTablePtr() {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAInfoForVTablePtr();
 }
 
 llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAStructInfo(QTy);
 }
 
 llvm::MDNode *CodeGenModule::getTBAAStructTypeInfo(QualType QTy) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAStructTypeInfo(QTy);
 }
 
@@ -314,7 +427,7 @@ llvm::MDNode *CodeGenModule::getTBAAStructTagInfo(QualType BaseTy,
                                                   llvm::MDNode *AccessN,
                                                   uint64_t O) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAStructTagInfo(BaseTy, AccessN, O);
 }
 
@@ -332,9 +445,9 @@ void CodeGenModule::DecorateInstruction(llvm::Instruction *Inst,
     Inst->setMetadata(llvm::LLVMContext::MD_tbaa, TBAAInfo);
 }
 
-void CodeGenModule::Error(SourceLocation loc, StringRef error) {
-  unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, error);
-  getDiags().Report(Context.getFullLoc(loc), diagID);
+void CodeGenModule::Error(SourceLocation loc, StringRef message) {
+  unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "%0");
+  getDiags().Report(Context.getFullLoc(loc), diagID) << message;
 }
 
 /// ErrorUnsupported - Print out an error that codegen doesn't support the
@@ -405,130 +518,61 @@ void CodeGenModule::setTLSMode(llvm::GlobalVariable *GV,
   TLM = GetLLVMTLSModel(CodeGenOpts.getDefaultTLSModel());
 
   // Override the TLS model if it is explicitly specified.
-  if (D.hasAttr<TLSModelAttr>()) {
-    const TLSModelAttr *Attr = D.getAttr<TLSModelAttr>();
+  if (const TLSModelAttr *Attr = D.getAttr<TLSModelAttr>()) {
     TLM = GetLLVMTLSModel(Attr->getModel());
   }
 
   GV->setThreadLocalMode(TLM);
 }
 
-/// Set the symbol visibility of type information (vtable and RTTI)
-/// associated with the given type.
-void CodeGenModule::setTypeVisibility(llvm::GlobalValue *GV,
-                                      const CXXRecordDecl *RD,
-                                      TypeVisibilityKind TVK) const {
-  setGlobalVisibility(GV, RD);
-
-  if (!CodeGenOpts.HiddenWeakVTables)
-    return;
-
-  // We never want to drop the visibility for RTTI names.
-  if (TVK == TVK_ForRTTIName)
-    return;
-
-  // We want to drop the visibility to hidden for weak type symbols.
-  // This isn't possible if there might be unresolved references
-  // elsewhere that rely on this symbol being visible.
-
-  // This should be kept roughly in sync with setThunkVisibility
-  // in CGVTables.cpp.
-
-  // Preconditions.
-  if (GV->getLinkage() != llvm::GlobalVariable::LinkOnceODRLinkage ||
-      GV->getVisibility() != llvm::GlobalVariable::DefaultVisibility)
-    return;
-
-  // Don't override an explicit visibility attribute.
-  if (RD->getExplicitVisibility(NamedDecl::VisibilityForType))
-    return;
-
-  switch (RD->getTemplateSpecializationKind()) {
-  // We have to disable the optimization if this is an EI definition
-  // because there might be EI declarations in other shared objects.
-  case TSK_ExplicitInstantiationDefinition:
-  case TSK_ExplicitInstantiationDeclaration:
-    return;
-
-  // Every use of a non-template class's type information has to emit it.
-  case TSK_Undeclared:
-    break;
-
-  // In theory, implicit instantiations can ignore the possibility of
-  // an explicit instantiation declaration because there necessarily
-  // must be an EI definition somewhere with default visibility.  In
-  // practice, it's possible to have an explicit instantiation for
-  // an arbitrary template class, and linkers aren't necessarily able
-  // to deal with mixed-visibility symbols.
-  case TSK_ExplicitSpecialization:
-  case TSK_ImplicitInstantiation:
-    return;
-  }
-
-  // If there's a key function, there may be translation units
-  // that don't have the key function's definition.  But ignore
-  // this if we're emitting RTTI under -fno-rtti.
-  if (!(TVK != TVK_ForRTTI) || LangOpts.RTTI) {
-    // FIXME: what should we do if we "lose" the key function during
-    // the emission of the file?
-    if (Context.getCurrentKeyFunction(RD))
-      return;
-  }
-
-  // Otherwise, drop the visibility to hidden.
-  GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  GV->setUnnamedAddr(true);
-}
-
 StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
-  const NamedDecl *ND = cast<NamedDecl>(GD.getDecl());
-
-  StringRef &Str = MangledDeclNames[GD.getCanonicalDecl()];
-  if (!Str.empty())
-    return Str;
+  StringRef &FoundStr = MangledDeclNames[GD.getCanonicalDecl()];
+  if (!FoundStr.empty())
+    return FoundStr;
 
-  if (!getCXXABI().getMangleContext().shouldMangleDeclName(ND)) {
+  const auto *ND = cast<NamedDecl>(GD.getDecl());
+  SmallString<256> Buffer;
+  StringRef Str;
+  if (getCXXABI().getMangleContext().shouldMangleDeclName(ND)) {
+    llvm::raw_svector_ostream Out(Buffer);
+    if (const auto *D = dyn_cast<CXXConstructorDecl>(ND))
+      getCXXABI().getMangleContext().mangleCXXCtor(D, GD.getCtorType(), Out);
+    else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND))
+      getCXXABI().getMangleContext().mangleCXXDtor(D, GD.getDtorType(), Out);
+    else
+      getCXXABI().getMangleContext().mangleName(ND, Out);
+    Str = Out.str();
+  } else {
     IdentifierInfo *II = ND->getIdentifier();
     assert(II && "Attempt to mangle unnamed decl.");
-
     Str = II->getName();
-    return Str;
   }
-  
-  SmallString<256> Buffer;
-  llvm::raw_svector_ostream Out(Buffer);
-  if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(ND))
-    getCXXABI().getMangleContext().mangleCXXCtor(D, GD.getCtorType(), Out);
-  else if (const CXXDestructorDecl *D = dyn_cast<CXXDestructorDecl>(ND))
-    getCXXABI().getMangleContext().mangleCXXDtor(D, GD.getDtorType(), Out);
-  else
-    getCXXABI().getMangleContext().mangleName(ND, Out);
 
-  // Allocate space for the mangled name.
-  Out.flush();
-  size_t Length = Buffer.size();
-  char *Name = MangledNamesAllocator.Allocate<char>(Length);
-  std::copy(Buffer.begin(), Buffer.end(), Name);
-  
-  Str = StringRef(Name, Length);
-  
-  return Str;
+  auto &Mangled = Manglings.GetOrCreateValue(Str);
+  Mangled.second = GD;
+  return FoundStr = Mangled.first();
 }
 
-void CodeGenModule::getBlockMangledName(GlobalDecl GD, MangleBuffer &Buffer,
-                                        const BlockDecl *BD) {
+StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD,
+                                             const BlockDecl *BD) {
   MangleContext &MangleCtx = getCXXABI().getMangleContext();
   const Decl *D = GD.getDecl();
-  llvm::raw_svector_ostream Out(Buffer.getBuffer());
-  if (D == 0)
+
+  SmallString<256> Buffer;
+  llvm::raw_svector_ostream Out(Buffer);
+  if (!D)
     MangleCtx.mangleGlobalBlock(BD, 
       dyn_cast_or_null<VarDecl>(initializedGlobalDecl.getDecl()), Out);
-  else if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(D))
+  else if (const auto *CD = dyn_cast<CXXConstructorDecl>(D))
     MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out);
-  else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(D))
+  else if (const auto *DD = dyn_cast<CXXDestructorDecl>(D))
     MangleCtx.mangleDtorBlock(DD, GD.getDtorType(), BD, Out);
   else
     MangleCtx.mangleBlock(cast<DeclContext>(D), BD, Out);
+
+  auto &Mangled = Manglings.GetOrCreateValue(Out.str());
+  Mangled.second = BD;
+  return Mangled.first();
 }
 
 llvm::GlobalValue *CodeGenModule::GetGlobalValue(StringRef Name) {
@@ -537,16 +581,17 @@ llvm::GlobalValue *CodeGenModule::GetGlobalValue(StringRef Name) {
 
 /// AddGlobalCtor - Add a function to the list that will be called before
 /// main() runs.
-void CodeGenModule::AddGlobalCtor(llvm::Function * Ctor, int Priority) {
+void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority,
+                                  llvm::Constant *AssociatedData) {
   // FIXME: Type coercion of void()* types.
-  GlobalCtors.push_back(std::make_pair(Ctor, Priority));
+  GlobalCtors.push_back(Structor(Priority, Ctor, AssociatedData));
 }
 
 /// AddGlobalDtor - Add a function to the list that will be called
 /// when the module is unloaded.
-void CodeGenModule::AddGlobalDtor(llvm::Function * Dtor, int Priority) {
+void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) {
   // FIXME: Type coercion of void()* types.
-  GlobalDtors.push_back(std::make_pair(Dtor, Priority));
+  GlobalDtors.push_back(Structor(Priority, Dtor, nullptr));
 }
 
 void CodeGenModule::EmitCtorList(const CtorList &Fns, const char *GlobalName) {
@@ -554,16 +599,19 @@ void CodeGenModule::EmitCtorList(const CtorList &Fns, const char *GlobalName) {
   llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false);
   llvm::Type *CtorPFTy = llvm::PointerType::getUnqual(CtorFTy);
 
-  // Get the type of a ctor entry, { i32, void ()* }.
-  llvm::StructType *CtorStructTy =
-    llvm::StructType::get(Int32Ty, llvm::PointerType::getUnqual(CtorFTy), NULL);
+  // Get the type of a ctor entry, { i32, void ()*, i8* }.
+  llvm::StructType *CtorStructTy = llvm::StructType::get(
+      Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, NULL);
 
   // Construct the constructor and destructor arrays.
   SmallVector<llvm::Constant*, 8> Ctors;
   for (CtorList::const_iterator I = Fns.begin(), E = Fns.end(); I != E; ++I) {
     llvm::Constant *S[] = {
-      llvm::ConstantInt::get(Int32Ty, I->second, false),
-      llvm::ConstantExpr::getBitCast(I->first, CtorPFTy)
+      llvm::ConstantInt::get(Int32Ty, I->Priority, false),
+      llvm::ConstantExpr::getBitCast(I->Initializer, CtorPFTy),
+      (I->AssociatedData
+           ? llvm::ConstantExpr::getBitCast(I->AssociatedData, VoidPtrTy)
+           : llvm::Constant::getNullValue(VoidPtrTy))
     };
     Ctors.push_back(llvm::ConstantStruct::get(CtorStructTy, S));
   }
@@ -579,67 +627,25 @@ void CodeGenModule::EmitCtorList(const CtorList &Fns, const char *GlobalName) {
 
 llvm::GlobalValue::LinkageTypes
 CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
-  const FunctionDecl *D = cast<FunctionDecl>(GD.getDecl());
-
-  if (isa<CXXDestructorDecl>(D) &&
-      getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
-                                         GD.getDtorType()))
-    return llvm::Function::LinkOnceODRLinkage;
+  const auto *D = cast<FunctionDecl>(GD.getDecl());
 
   GVALinkage Linkage = getContext().GetGVALinkageForFunction(D);
 
-  if (Linkage == GVA_Internal)
-    return llvm::Function::InternalLinkage;
-  
-  if (D->hasAttr<DLLExportAttr>())
-    return llvm::Function::DLLExportLinkage;
-  
-  if (D->hasAttr<WeakAttr>())
-    return llvm::Function::WeakAnyLinkage;
-  
-  // In C99 mode, 'inline' functions are guaranteed to have a strong
-  // definition somewhere else, so we can use available_externally linkage.
-  if (Linkage == GVA_C99Inline)
-    return llvm::Function::AvailableExternallyLinkage;
-
-  // Note that Apple's kernel linker doesn't support symbol
-  // coalescing, so we need to avoid linkonce and weak linkages there.
-  // Normally, this means we just map to internal, but for explicit
-  // instantiations we'll map to external.
+  if (isa<CXXDestructorDecl>(D) &&
+      getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
+                                         GD.getDtorType())) {
+    // Destructor variants in the Microsoft C++ ABI are always internal or
+    // linkonce_odr thunks emitted on an as-needed basis.
+    return Linkage == GVA_Internal ? llvm::GlobalValue::InternalLinkage
+                                   : llvm::GlobalValue::LinkOnceODRLinkage;
+  }
 
-  // In C++, the compiler has to emit a definition in every translation unit
-  // that references the function.  We should use linkonce_odr because
-  // a) if all references in this translation unit are optimized away, we
-  // don't need to codegen it.  b) if the function persists, it needs to be
-  // merged with other definitions. c) C++ has the ODR, so we know the
-  // definition is dependable.
-  if (Linkage == GVA_CXXInline || Linkage == GVA_TemplateInstantiation)
-    return !Context.getLangOpts().AppleKext 
-             ? llvm::Function::LinkOnceODRLinkage 
-             : llvm::Function::InternalLinkage;
-  
-  // An explicit instantiation of a template has weak linkage, since
-  // explicit instantiations can occur in multiple translation units
-  // and must all be equivalent. However, we are not allowed to
-  // throw away these explicit instantiations.
-  if (Linkage == GVA_ExplicitTemplateInstantiation)
-    return !Context.getLangOpts().AppleKext
-             ? llvm::Function::WeakODRLinkage
-             : llvm::Function::ExternalLinkage;
-  
-  // Otherwise, we have strong external linkage.
-  assert(Linkage == GVA_StrongExternal);
-  return llvm::Function::ExternalLinkage;
+  return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
 }
 
-
-/// SetFunctionDefinitionAttributes - Set attributes for a global.
-///
-/// FIXME: This is currently only done for aliases and functions, but not for
-/// variables (these details are set in EmitGlobalVarDefinition for variables).
-void CodeGenModule::SetFunctionDefinitionAttributes(const FunctionDecl *D,
-                                                    llvm::GlobalValue *GV) {
-  SetCommonAttributes(D, GV);
+void CodeGenModule::setFunctionDefinitionAttributes(const FunctionDecl *D,
+                                                    llvm::Function *F) {
+  setNonAliasAttributes(D, F);
 }
 
 void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
@@ -686,10 +692,15 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     // Naked implies noinline: we should not be inlining such functions.
     B.addAttribute(llvm::Attribute::Naked);
     B.addAttribute(llvm::Attribute::NoInline);
+  } else if (D->hasAttr<OptimizeNoneAttr>()) {
+    // OptimizeNone implies noinline; we should not be inlining such functions.
+    B.addAttribute(llvm::Attribute::OptimizeNone);
+    B.addAttribute(llvm::Attribute::NoInline);
+  } else if (D->hasAttr<NoDuplicateAttr>()) {
+    B.addAttribute(llvm::Attribute::NoDuplicate);
   } else if (D->hasAttr<NoInlineAttr>()) {
     B.addAttribute(llvm::Attribute::NoInline);
-  } else if ((D->hasAttr<AlwaysInlineAttr>() ||
-              D->hasAttr<ForceInlineAttr>()) &&
+  } else if (D->hasAttr<AlwaysInlineAttr>() &&
              !F->getAttributes().hasAttribute(llvm::AttributeSet::FunctionIndex,
                                               llvm::Attribute::NoInline)) {
     // (noinline wins over always_inline, and we can't specify both in IR)
@@ -704,23 +715,30 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   if (D->hasAttr<MinSizeAttr>())
     B.addAttribute(llvm::Attribute::MinSize);
 
+  if (D->hasAttr<OptimizeNoneAttr>()) {
+    // OptimizeNone wins over OptimizeForSize and MinSize.
+    B.removeAttribute(llvm::Attribute::OptimizeForSize);
+    B.removeAttribute(llvm::Attribute::MinSize);
+  }
+
   if (LangOpts.getStackProtector() == LangOptions::SSPOn)
     B.addAttribute(llvm::Attribute::StackProtect);
+  else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
+    B.addAttribute(llvm::Attribute::StackProtectStrong);
   else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
     B.addAttribute(llvm::Attribute::StackProtectReq);
 
   // Add sanitizer attributes if function is not blacklisted.
-  if (!SanitizerBlacklist->isIn(*F)) {
+  if (!SanitizerBL.isIn(*F)) {
     // When AddressSanitizer is enabled, set SanitizeAddress attribute
     // unless __attribute__((no_sanitize_address)) is used.
-    if (SanOpts.Address && !D->hasAttr<NoSanitizeAddressAttr>())
+    if (LangOpts.Sanitize.Address && !D->hasAttr<NoSanitizeAddressAttr>())
       B.addAttribute(llvm::Attribute::SanitizeAddress);
     // Same for ThreadSanitizer and __attribute__((no_sanitize_thread))
-    if (SanOpts.Thread && !D->hasAttr<NoSanitizeThreadAttr>()) {
+    if (LangOpts.Sanitize.Thread && !D->hasAttr<NoSanitizeThreadAttr>())
       B.addAttribute(llvm::Attribute::SanitizeThread);
-    }
     // Same for MemorySanitizer and __attribute__((no_sanitize_memory))
-    if (SanOpts.Memory && !D->hasAttr<NoSanitizeMemoryAttr>())
+    if (LangOpts.Sanitize.Memory && !D->hasAttr<NoSanitizeMemoryAttr>())
       B.addAttribute(llvm::Attribute::SanitizeMemory);
   }
 
@@ -730,7 +748,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
 
   if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
     F->setUnnamedAddr(true);
-  else if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D))
+  else if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
     if (MD->isVirtual())
       F->setUnnamedAddr(true);
 
@@ -745,20 +763,23 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
 
 void CodeGenModule::SetCommonAttributes(const Decl *D,
                                         llvm::GlobalValue *GV) {
-  if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
+  if (const auto *ND = dyn_cast<NamedDecl>(D))
     setGlobalVisibility(GV, ND);
   else
     GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
 
   if (D->hasAttr<UsedAttr>())
-    AddUsedGlobal(GV);
+    addUsedGlobal(GV);
+}
+
+void CodeGenModule::setNonAliasAttributes(const Decl *D,
+                                          llvm::GlobalObject *GO) {
+  SetCommonAttributes(D, GO);
 
   if (const SectionAttr *SA = D->getAttr<SectionAttr>())
-    GV->setSection(SA->getName());
+    GO->setSection(SA->getName());
 
-  // Alias cannot have attributes. Filter them here.
-  if (!isa<llvm::GlobalAlias>(GV))
-    getTargetCodeGenInfo().SetTargetAttributes(D, GV, *this);
+  getTargetCodeGenInfo().SetTargetAttributes(D, GO, *this);
 }
 
 void CodeGenModule::SetInternalFunctionAttributes(const Decl *D,
@@ -769,7 +790,32 @@ void CodeGenModule::SetInternalFunctionAttributes(const Decl *D,
 
   F->setLinkage(llvm::Function::InternalLinkage);
 
-  SetCommonAttributes(D, F);
+  setNonAliasAttributes(D, F);
+}
+
+static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
+                                         const NamedDecl *ND) {
+  // Set linkage and visibility in case we never see a definition.
+  LinkageInfo LV = ND->getLinkageAndVisibility();
+  if (LV.getLinkage() != ExternalLinkage) {
+    // Don't set internal linkage on declarations.
+  } else {
+    if (ND->hasAttr<DLLImportAttr>()) {
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+    } else if (ND->hasAttr<DLLExportAttr>()) {
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+    } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) {
+      // "extern_weak" is overloaded in LLVM; we probably should have
+      // separate linkage types for this.
+      GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+    }
+
+    // Set visibility on a declaration only if it's explicit.
+    if (LV.isVisibilityExplicit())
+      GV->setVisibility(CodeGenModule::GetLLVMVisibility(LV.getVisibility()));
+  }
 }
 
 void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
@@ -783,12 +829,17 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
     return;
   }
 
-  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
 
   if (!IsIncompleteFunction)
     SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F);
 
-  if (getCXXABI().HasThisReturn(GD)) {
+  // Add the Returned attribute for "this", except for iOS 5 and earlier
+  // where substantial code, including the libstdc++ dylib, was compiled with
+  // GCC and does not actually return "this".
+  if (getCXXABI().HasThisReturn(GD) &&
+      !(getTarget().getTriple().isiOS() &&
+        getTarget().getTriple().isOSVersionLT(6))) {
     assert(!F->arg_empty() &&
            F->arg_begin()->getType()
              ->canLosslesslyBitCastTo(F->getReturnType()) &&
@@ -799,19 +850,12 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
   // Only a few attributes are set on declarations; these may later be
   // overridden by a definition.
 
-  if (FD->hasAttr<DLLImportAttr>()) {
-    F->setLinkage(llvm::Function::DLLImportLinkage);
-  } else if (FD->hasAttr<WeakAttr>() ||
-             FD->isWeakImported()) {
-    // "extern_weak" is overloaded in LLVM; we probably should have
-    // separate linkage types for this.
-    F->setLinkage(llvm::Function::ExternalWeakLinkage);
-  } else {
-    F->setLinkage(llvm::Function::ExternalLinkage);
+  setLinkageAndVisibilityForGV(F, FD);
 
-    LinkageInfo LV = FD->getLinkageAndVisibility();
-    if (LV.getLinkage() == ExternalLinkage && LV.isVisibilityExplicit()) {
-      F->setVisibility(GetLLVMVisibility(LV.getVisibility()));
+  if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(FD)) {
+    if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) {
+      // Don't dllexport/import destructor thunks.
+      F->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
     }
   }
 
@@ -825,39 +869,49 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
                     llvm::Attribute::NoBuiltin);
 }
 
-void CodeGenModule::AddUsedGlobal(llvm::GlobalValue *GV) {
+void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
   assert(!GV->isDeclaration() &&
          "Only globals with definition can force usage.");
   LLVMUsed.push_back(GV);
 }
 
-void CodeGenModule::EmitLLVMUsed() {
+void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) {
+  assert(!GV->isDeclaration() &&
+         "Only globals with definition can force usage.");
+  LLVMCompilerUsed.push_back(GV);
+}
+
+static void emitUsed(CodeGenModule &CGM, StringRef Name,
+                     std::vector<llvm::WeakVH> &List) {
   // Don't create llvm.used if there is no need.
-  if (LLVMUsed.empty())
+  if (List.empty())
     return;
 
-  // Convert LLVMUsed to what ConstantArray needs.
+  // Convert List to what ConstantArray needs.
   SmallVector<llvm::Constant*, 8> UsedArray;
-  UsedArray.resize(LLVMUsed.size());
-  for (unsigned i = 0, e = LLVMUsed.size(); i != e; ++i) {
+  UsedArray.resize(List.size());
+  for (unsigned i = 0, e = List.size(); i != e; ++i) {
     UsedArray[i] =
-     llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*LLVMUsed[i]),
-                                    Int8PtrTy);
+     llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*List[i]),
+                                    CGM.Int8PtrTy);
   }
 
   if (UsedArray.empty())
     return;
-  llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size());
+  llvm::ArrayType *ATy = llvm::ArrayType::get(CGM.Int8PtrTy, UsedArray.size());
 
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(), ATy, false,
-                             llvm::GlobalValue::AppendingLinkage,
-                             llvm::ConstantArray::get(ATy, UsedArray),
-                             "llvm.used");
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage,
+      llvm::ConstantArray::get(ATy, UsedArray), Name);
 
   GV->setSection("llvm.metadata");
 }
 
+void CodeGenModule::emitLLVMUsed() {
+  emitUsed(*this, "llvm.used", LLVMUsed);
+  emitUsed(*this, "llvm.compiler.used", LLVMCompilerUsed);
+}
+
 void CodeGenModule::AppendLinkerOptions(StringRef Opts) {
   llvm::Value *MDOpts = llvm::MDString::get(getLLVMContext(), Opts);
   LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
@@ -1002,31 +1056,23 @@ void CodeGenModule::EmitDeferred() {
     // Stop if we're out of both deferred v-tables and deferred declarations.
     if (DeferredDeclsToEmit.empty()) break;
 
-    GlobalDecl D = DeferredDeclsToEmit.back();
+    DeferredGlobal &G = DeferredDeclsToEmit.back();
+    GlobalDecl D = G.GD;
+    llvm::GlobalValue *GV = G.GV;
     DeferredDeclsToEmit.pop_back();
 
+    assert(GV == GetGlobalValue(getMangledName(D)));
     // Check to see if we've already emitted this.  This is necessary
     // for a couple of reasons: first, decls can end up in the
     // deferred-decls queue multiple times, and second, decls can end
     // up with definitions in unusual ways (e.g. by an extern inline
     // function acquiring a strong function redefinition).  Just
     // ignore these cases.
-    //
-    // TODO: That said, looking this up multiple times is very wasteful.
-    StringRef Name = getMangledName(D);
-    llvm::GlobalValue *CGRef = GetGlobalValue(Name);
-    assert(CGRef && "Deferred decl wasn't referenced?");
-
-    if (!CGRef->isDeclaration())
-      continue;
-
-    // GlobalAlias::isDeclaration() defers to the aliasee, but for our
-    // purposes an alias counts as a definition.
-    if (isa<llvm::GlobalAlias>(CGRef))
+    if(!GV->isDeclaration())
       continue;
 
     // Otherwise, emit the definition and move on to the next one.
-    EmitGlobalDefinition(D);
+    EmitGlobalDefinition(D, GV);
   }
 }
 
@@ -1037,9 +1083,9 @@ void CodeGenModule::EmitGlobalAnnotations() {
   // Create a new global variable for the ConstantStruct in the Module.
   llvm::Constant *Array = llvm::ConstantArray::get(llvm::ArrayType::get(
     Annotations[0]->getType(), Annotations.size()), Annotations);
-  llvm::GlobalValue *gv = new llvm::GlobalVariable(getModule(),
-    Array->getType(), false, llvm::GlobalValue::AppendingLinkage, Array,
-    "llvm.global.annotations");
+  auto *gv = new llvm::GlobalVariable(getModule(), Array->getType(), false,
+                                      llvm::GlobalValue::AppendingLinkage,
+                                      Array, "llvm.global.annotations");
   gv->setSection(AnnotationSection);
 }
 
@@ -1050,8 +1096,9 @@ llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) {
 
   // Not found yet, create a new global.
   llvm::Constant *s = llvm::ConstantDataArray::getString(getLLVMContext(), Str);
-  llvm::GlobalValue *gv = new llvm::GlobalVariable(getModule(), s->getType(),
-    true, llvm::GlobalValue::PrivateLinkage, s, ".str");
+  auto *gv =
+      new llvm::GlobalVariable(getModule(), s->getType(), true,
+                               llvm::GlobalValue::PrivateLinkage, s, ".str");
   gv->setSection(AnnotationSection);
   gv->setUnnamedAddr(true);
   AStr = gv;
@@ -1096,10 +1143,8 @@ void CodeGenModule::AddGlobalAnnotations(const ValueDecl *D,
                                          llvm::GlobalValue *GV) {
   assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
   // Get the struct elements for these annotations.
-  for (specific_attr_iterator<AnnotateAttr>
-       ai = D->specific_attr_begin<AnnotateAttr>(),
-       ae = D->specific_attr_end<AnnotateAttr>(); ai != ae; ++ai)
-    Annotations.push_back(EmitAnnotateAttr(GV, *ai, D->getLocation()));
+  for (const auto *I : D->specific_attrs<AnnotateAttr>())
+    Annotations.push_back(EmitAnnotateAttr(GV, I, D->getLocation()));
 }
 
 bool CodeGenModule::MayDeferGeneration(const ValueDecl *Global) {
@@ -1125,7 +1170,7 @@ llvm::Constant *CodeGenModule::GetAddrOfUuidDescriptor(
   llvm::Constant *Init = EmitUuidofInitializer(Uuid, E->getType());
   assert(Init && "failed to initialize as constant");
 
-  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+  auto *GV = new llvm::GlobalVariable(
       getModule(), Init->getType(),
       /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name);
   return GV;
@@ -1151,9 +1196,10 @@ llvm::Constant *CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
                                       /*ForVTable=*/false);
   else
     Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(),
-                                    llvm::PointerType::getUnqual(DeclTy), 0);
+                                    llvm::PointerType::getUnqual(DeclTy),
+                                    nullptr);
 
-  llvm::GlobalValue* F = cast<llvm::GlobalValue>(Aliasee);
+  auto *F = cast<llvm::GlobalValue>(Aliasee);
   F->setLinkage(llvm::Function::ExternalWeakLinkage);
   WeakRefReferences.insert(F);
 
@@ -1161,7 +1207,7 @@ llvm::Constant *CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
 }
 
 void CodeGenModule::EmitGlobal(GlobalDecl GD) {
-  const ValueDecl *Global = cast<ValueDecl>(GD.getDecl());
+  const auto *Global = cast<ValueDecl>(GD.getDecl());
 
   // Weak references don't produce any output by themselves.
   if (Global->hasAttr<WeakRefAttr>())
@@ -1190,25 +1236,28 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
   }
 
   // Ignore declarations, they will be emitted on their first use.
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(Global)) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
     // Forward declarations are emitted lazily on first use.
     if (!FD->doesThisDeclarationHaveABody()) {
       if (!FD->doesDeclarationForceExternallyVisibleDefinition())
         return;
 
-      const FunctionDecl *InlineDefinition = 0;
-      FD->getBody(InlineDefinition);
-
       StringRef MangledName = getMangledName(GD);
-      DeferredDecls.erase(MangledName);
-      EmitGlobalDefinition(InlineDefinition);
+
+      // Compute the function info and LLVM type.
+      const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
+      llvm::Type *Ty = getTypes().GetFunctionType(FI);
+
+      GetOrCreateLLVMFunction(MangledName, Ty, GD, /*ForVTable=*/false,
+                              /*DontDefer=*/false);
       return;
     }
   } else {
-    const VarDecl *VD = cast<VarDecl>(Global);
+    const auto *VD = cast<VarDecl>(Global);
     assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
 
-    if (VD->isThisDeclarationADefinition() != VarDecl::Definition)
+    if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&
+        !Context.isMSStaticDataMemberInlineDefinition(VD))
       return;
   }
 
@@ -1225,14 +1274,14 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
   if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) &&
       cast<VarDecl>(Global)->hasInit()) {
     DelayedCXXInitPosition[Global] = CXXGlobalInits.size();
-    CXXGlobalInits.push_back(0);
+    CXXGlobalInits.push_back(nullptr);
   }
   
   // If the value has already been used, add it directly to the
   // DeferredDeclsToEmit list.
   StringRef MangledName = getMangledName(GD);
-  if (GetGlobalValue(MangledName))
-    DeferredDeclsToEmit.push_back(GD);
+  if (llvm::GlobalValue *GV = GetGlobalValue(MangledName))
+    addDeferredDeclToEmit(GV, GD);
   else {
     // Otherwise, remember that we saw a deferred decl with this name.  The
     // first use of the mangled name will cause it to move into
@@ -1300,9 +1349,8 @@ bool
 CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
   if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage)
     return true;
-  const FunctionDecl *F = cast<FunctionDecl>(GD.getDecl());
-  if (CodeGenOpts.OptimizationLevel == 0 &&
-      !F->hasAttr<AlwaysInlineAttr>() && !F->hasAttr<ForceInlineAttr>())
+  const auto *F = cast<FunctionDecl>(GD.getDecl());
+  if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>())
     return false;
   // PR9614. Avoid cases where the source code is lying to us. An available
   // externally function should have an equivalent function somewhere else,
@@ -1323,14 +1371,13 @@ void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) {
 
   if (CGDebugInfo *DI = getModuleDebugInfo())
     if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) {
-      const PointerType *ThisPtr =
-        cast<PointerType>(D->getThisType(getContext()));
+      const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext()));
       DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation());
     }
 }
 
-void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD) {
-  const ValueDecl *D = cast<ValueDecl>(GD.getDecl());
+void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
+  const auto *D = cast<ValueDecl>(GD.getDecl());
 
   PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(), 
                                  Context.getSourceManager(),
@@ -1342,16 +1389,16 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD) {
     if (!shouldEmitFunction(GD))
       return;
 
-    if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
+    if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
       CompleteDIClassType(Method);
       // Make sure to emit the definition(s) before we emit the thunks.
       // This is necessary for the generation of certain thunks.
-      if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(Method))
+      if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method))
         EmitCXXConstructor(CD, GD.getCtorType());
-      else if (const CXXDestructorDecl *DD =dyn_cast<CXXDestructorDecl>(Method))
+      else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method))
         EmitCXXDestructor(DD, GD.getDtorType());
       else
-        EmitGlobalFunctionDefinition(GD);
+        EmitGlobalFunctionDefinition(GD, GV);
 
       if (Method->isVirtual())
         getVTables().EmitThunks(GD);
@@ -1359,10 +1406,10 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD) {
       return;
     }
 
-    return EmitGlobalFunctionDefinition(GD);
+    return EmitGlobalFunctionDefinition(GD, GV);
   }
-  
-  if (const VarDecl *VD = dyn_cast<VarDecl>(D))
+
+  if (const auto *VD = dyn_cast<VarDecl>(D))
     return EmitGlobalVarDefinition(VD);
   
   llvm_unreachable("Invalid argument to EmitGlobalDefinition()");
@@ -1379,6 +1426,7 @@ llvm::Constant *
 CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
                                        llvm::Type *Ty,
                                        GlobalDecl GD, bool ForVTable,
+                                       bool DontDefer,
                                        llvm::AttributeSet ExtraAttrs) {
   const Decl *D = GD.getDecl();
 
@@ -1398,14 +1446,6 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
     return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo());
   }
 
-  // All MSVC dtors other than the base dtor are linkonce_odr and delegate to
-  // each other bottoming out with the base dtor.  Therefore we emit non-base
-  // dtors on usage, even if there is no dtor definition in the TU.
-  if (D && isa<CXXDestructorDecl>(D) &&
-      getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
-                                         GD.getDtorType()))
-    DeferredDeclsToEmit.push_back(GD);
-
   // This function doesn't have a complete type (for example, the return
   // type is an incomplete struct). Use a fake type instead, and make
   // sure not to try to set attributes.
@@ -1433,50 +1473,64 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
                                              B));
   }
 
-  // This is the first use or definition of a mangled name.  If there is a
-  // deferred decl with this name, remember that we need to emit it at the end
-  // of the file.
-  llvm::StringMap<GlobalDecl>::iterator DDI = DeferredDecls.find(MangledName);
-  if (DDI != DeferredDecls.end()) {
-    // Move the potentially referenced deferred decl to the DeferredDeclsToEmit
-    // list, and remove it from DeferredDecls (since we don't need it anymore).
-    DeferredDeclsToEmit.push_back(DDI->second);
-    DeferredDecls.erase(DDI);
-
-  // Otherwise, if this is a sized deallocation function, emit a weak definition
-  // for it at the end of the translation unit.
-  } else if (D && cast<FunctionDecl>(D)
-                      ->getCorrespondingUnsizedGlobalDeallocationFunction()) {
-    DeferredDeclsToEmit.push_back(GD);
-
-  // Otherwise, there are cases we have to worry about where we're
-  // using a declaration for which we must emit a definition but where
-  // we might not find a top-level definition:
-  //   - member functions defined inline in their classes
-  //   - friend functions defined inline in some class
-  //   - special member functions with implicit definitions
-  // If we ever change our AST traversal to walk into class methods,
-  // this will be unnecessary.
-  //
-  // We also don't emit a definition for a function if it's going to be an entry
-  // in a vtable, unless it's already marked as used.
-  } else if (getLangOpts().CPlusPlus && D) {
-    // Look for a declaration that's lexically in a record.
-    const FunctionDecl *FD = cast<FunctionDecl>(D);
-    FD = FD->getMostRecentDecl();
-    do {
-      if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) {
-        if (FD->isImplicit() && !ForVTable) {
-          assert(FD->isUsed() && "Sema didn't mark implicit function as used!");
-          DeferredDeclsToEmit.push_back(GD.getWithDecl(FD));
-          break;
-        } else if (FD->doesThisDeclarationHaveABody()) {
-          DeferredDeclsToEmit.push_back(GD.getWithDecl(FD));
-          break;
+  if (!DontDefer) {
+    // All MSVC dtors other than the base dtor are linkonce_odr and delegate to
+    // each other bottoming out with the base dtor.  Therefore we emit non-base
+    // dtors on usage, even if there is no dtor definition in the TU.
+    if (D && isa<CXXDestructorDecl>(D) &&
+        getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
+                                           GD.getDtorType()))
+      addDeferredDeclToEmit(F, GD);
+
+    // This is the first use or definition of a mangled name.  If there is a
+    // deferred decl with this name, remember that we need to emit it at the end
+    // of the file.
+    auto DDI = DeferredDecls.find(MangledName);
+    if (DDI != DeferredDecls.end()) {
+      // Move the potentially referenced deferred decl to the
+      // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we
+      // don't need it anymore).
+      addDeferredDeclToEmit(F, DDI->second);
+      DeferredDecls.erase(DDI);
+
+      // Otherwise, if this is a sized deallocation function, emit a weak
+      // definition
+      // for it at the end of the translation unit.
+    } else if (D && cast<FunctionDecl>(D)
+                        ->getCorrespondingUnsizedGlobalDeallocationFunction()) {
+      addDeferredDeclToEmit(F, GD);
+
+      // Otherwise, there are cases we have to worry about where we're
+      // using a declaration for which we must emit a definition but where
+      // we might not find a top-level definition:
+      //   - member functions defined inline in their classes
+      //   - friend functions defined inline in some class
+      //   - special member functions with implicit definitions
+      // If we ever change our AST traversal to walk into class methods,
+      // this will be unnecessary.
+      //
+      // We also don't emit a definition for a function if it's going to be an
+      // entry
+      // in a vtable, unless it's already marked as used.
+    } else if (getLangOpts().CPlusPlus && D) {
+      // Look for a declaration that's lexically in a record.
+      const auto *FD = cast<FunctionDecl>(D);
+      FD = FD->getMostRecentDecl();
+      do {
+        if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) {
+          if (FD->isImplicit() && !ForVTable) {
+            assert(FD->isUsed() &&
+                   "Sema didn't mark implicit function as used!");
+            addDeferredDeclToEmit(F, GD.getWithDecl(FD));
+            break;
+          } else if (FD->doesThisDeclarationHaveABody()) {
+            addDeferredDeclToEmit(F, GD.getWithDecl(FD));
+            break;
+          }
         }
-      }
-      FD = FD->getPreviousDecl();
-    } while (FD);
+        FD = FD->getPreviousDecl();
+      } while (FD);
+    }
   }
 
   // Make sure the result is of the requested type.
@@ -1494,13 +1548,14 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
 /// create it (this occurs when we see a definition of the function).
 llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
                                                  llvm::Type *Ty,
-                                                 bool ForVTable) {
+                                                 bool ForVTable,
+                                                 bool DontDefer) {
   // If there was no specific requested type, just convert it now.
   if (!Ty)
     Ty = getTypes().ConvertType(cast<ValueDecl>(GD.getDecl())->getType());
   
   StringRef MangledName = getMangledName(GD);
-  return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable);
+  return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer);
 }
 
 /// CreateRuntimeFunction - Create a new runtime function with the specified
@@ -1509,10 +1564,10 @@ llvm::Constant *
 CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy,
                                      StringRef Name,
                                      llvm::AttributeSet ExtraAttrs) {
-  llvm::Constant *C
-    = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
-                              ExtraAttrs);
-  if (llvm::Function *F = dyn_cast<llvm::Function>(C))
+  llvm::Constant *C =
+      GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
+                              /*DontDefer=*/false, ExtraAttrs);
+  if (auto *F = dyn_cast<llvm::Function>(C))
     if (F->empty())
       F->setCallingConv(getRuntimeCC());
   return C;
@@ -1548,8 +1603,7 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) {
 llvm::Constant *
 CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
                                      llvm::PointerType *Ty,
-                                     const VarDecl *D,
-                                     bool UnnamedAddr) {
+                                     const VarDecl *D) {
   // Lookup the entry, lazily creating it if necessary.
   llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
   if (Entry) {
@@ -1558,9 +1612,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
         Entry->setLinkage(llvm::Function::ExternalLinkage);
     }
 
-    if (UnnamedAddr)
-      Entry->setUnnamedAddr(true);
-
     if (Entry->getType() == Ty)
       return Entry;
 
@@ -1571,44 +1622,30 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
     return llvm::ConstantExpr::getBitCast(Entry, Ty);
   }
 
+  unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
+  auto *GV = new llvm::GlobalVariable(
+      getModule(), Ty->getElementType(), false,
+      llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr,
+      llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+
   // This is the first use or definition of a mangled name.  If there is a
   // deferred decl with this name, remember that we need to emit it at the end
   // of the file.
-  llvm::StringMap<GlobalDecl>::iterator DDI = DeferredDecls.find(MangledName);
+  auto DDI = DeferredDecls.find(MangledName);
   if (DDI != DeferredDecls.end()) {
     // Move the potentially referenced deferred decl to the DeferredDeclsToEmit
     // list, and remove it from DeferredDecls (since we don't need it anymore).
-    DeferredDeclsToEmit.push_back(DDI->second);
+    addDeferredDeclToEmit(GV, DDI->second);
     DeferredDecls.erase(DDI);
   }
 
-  unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(), Ty->getElementType(), false,
-                             llvm::GlobalValue::ExternalLinkage,
-                             0, MangledName, 0,
-                             llvm::GlobalVariable::NotThreadLocal, AddrSpace);
-
   // Handle things which are present even on external declarations.
   if (D) {
     // FIXME: This code is overly simple and should be merged with other global
     // handling.
     GV->setConstant(isTypeConstant(D->getType(), false));
 
-    // Set linkage and visibility in case we never see a definition.
-    LinkageInfo LV = D->getLinkageAndVisibility();
-    if (LV.getLinkage() != ExternalLinkage) {
-      // Don't set internal linkage on declarations.
-    } else {
-      if (D->hasAttr<DLLImportAttr>())
-        GV->setLinkage(llvm::GlobalValue::DLLImportLinkage);
-      else if (D->hasAttr<WeakAttr>() || D->isWeakImported())
-        GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
-
-      // Set visibility on a declaration only if it's explicit.
-      if (LV.isVisibilityExplicit())
-        GV->setVisibility(GetLLVMVisibility(LV.getVisibility()));
-    }
+    setLinkageAndVisibilityForGV(GV, D);
 
     if (D->getTLSKind()) {
       if (D->getTLSKind() == VarDecl::TLS_Dynamic)
@@ -1618,10 +1655,16 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
 
     // If required by the ABI, treat declarations of static data members with
     // inline initializers as definitions.
-    if (getCXXABI().isInlineInitializedStaticDataMemberLinkOnce() &&
-        D->isStaticDataMember() && D->hasInit() &&
-        !D->isThisDeclarationADefinition())
+    if (getContext().isMSStaticDataMemberInlineDefinition(D)) {
       EmitGlobalVarDefinition(D);
+    }
+
+    // Handle XCore specific ABI requirements.
+    if (getTarget().getTriple().getArch() == llvm::Triple::xcore &&
+        D->getLanguageLinkage() == CLanguageLinkage &&
+        D->getType().isConstant(Context) &&
+        isExternallyVisible(D->getLinkageAndVisibility().getLinkage()))
+      GV->setSection(".cp.rodata");
   }
 
   if (AddrSpace != Ty->getAddressSpace())
@@ -1636,9 +1679,8 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
                                       llvm::Type *Ty,
                                       llvm::GlobalValue::LinkageTypes Linkage) {
   llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name);
-  llvm::GlobalVariable *OldGV = 0;
+  llvm::GlobalVariable *OldGV = nullptr;
 
-  
   if (GV) {
     // Check if the variable has the right type.
     if (GV->getType()->getElementType() == Ty)
@@ -1652,8 +1694,8 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
   
   // Create a new variable.
   GV = new llvm::GlobalVariable(getModule(), Ty, /*isConstant=*/true,
-                                Linkage, 0, Name);
-  
+                                Linkage, nullptr, Name);
+
   if (OldGV) {
     // Replace occurrences of the old variable if needed.
     GV->takeName(OldGV);
@@ -1678,7 +1720,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
                                                   llvm::Type *Ty) {
   assert(D->hasGlobalStorage() && "Not a global variable");
   QualType ASTTy = D->getType();
-  if (Ty == 0)
+  if (!Ty)
     Ty = getTypes().ConvertTypeForMem(ASTTy);
 
   llvm::PointerType *PTy =
@@ -1693,8 +1735,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
 llvm::Constant *
 CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty,
                                      StringRef Name) {
-  return GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), 0,
-                               true);
+  return GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr);
 }
 
 void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
@@ -1764,11 +1805,11 @@ void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D,
   // If we have multiple internal linkage entities with the same name
   // in extern "C" regions, none of them gets that name.
   if (!R.second)
-    R.first->second = 0;
+    R.first->second = nullptr;
 }
 
 void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
-  llvm::Constant *Init = 0;
+  llvm::Constant *Init = nullptr;
   QualType ASTTy = D->getType();
   CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
   bool NeedsGlobalCtor = false;
@@ -1818,7 +1859,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType);
 
   // Strip off a bitcast if we got one back.
-  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
+  if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
     assert(CE->getOpcode() == llvm::Instruction::BitCast ||
            CE->getOpcode() == llvm::Instruction::AddrSpaceCast ||
            // All zero index gep.
@@ -1827,7 +1868,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   }
 
   // Entry is now either a Function or GlobalVariable.
-  llvm::GlobalVariable *GV = dyn_cast<llvm::GlobalVariable>(Entry);
+  auto *GV = dyn_cast<llvm::GlobalVariable>(Entry);
 
   // We have a definition after a declaration with the wrong type.
   // We must make a new GlobalVariable* and update everything that used OldGV
@@ -1838,7 +1879,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   // "extern int x[];") and then a definition of a different type (e.g.
   // "int x[10];"). This also happens when an initializer has a different type
   // from the type of the global (this happens with unions).
-  if (GV == 0 ||
+  if (!GV ||
       GV->getType()->getElementType() != InitType ||
       GV->getType()->getAddressSpace() !=
        GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) {
@@ -1872,38 +1913,35 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
 
   // Set the llvm linkage type as appropriate.
-  llvm::GlobalValue::LinkageTypes Linkage = 
-    GetLLVMLinkageVarDefinition(D, GV->isConstant());
-  GV->setLinkage(Linkage);
+  llvm::GlobalValue::LinkageTypes Linkage =
+      getLLVMLinkageVarDefinition(D, GV->isConstant());
+
+  // On Darwin, the backing variable for a C++11 thread_local variable always
+  // has internal linkage; all accesses should just be calls to the
+  // Itanium-specified entry point, which has the normal linkage of the
+  // variable.
+  if (const auto *VD = dyn_cast<VarDecl>(D))
+    if (!VD->isStaticLocal() && VD->getTLSKind() == VarDecl::TLS_Dynamic &&
+        Context.getTargetInfo().getTriple().isMacOSX())
+      Linkage = llvm::GlobalValue::InternalLinkage;
 
-  // If required by the ABI, give definitions of static data members with inline
-  // initializers linkonce_odr linkage.
-  if (getCXXABI().isInlineInitializedStaticDataMemberLinkOnce() &&
-      D->isStaticDataMember() && InitExpr &&
-      !InitDecl->isThisDeclarationADefinition())
-    GV->setLinkage(llvm::GlobalVariable::LinkOnceODRLinkage);
+  GV->setLinkage(Linkage);
+  if (D->hasAttr<DLLImportAttr>())
+    GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
+  else if (D->hasAttr<DLLExportAttr>())
+    GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
 
   if (Linkage == llvm::GlobalVariable::CommonLinkage)
     // common vars aren't constant even if declared const.
     GV->setConstant(false);
 
-  SetCommonAttributes(D, GV);
+  setNonAliasAttributes(D, GV);
 
   // Emit the initializer function if necessary.
   if (NeedsGlobalCtor || NeedsGlobalDtor)
     EmitCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor);
 
-  // If we are compiling with ASan, add metadata indicating dynamically
-  // initialized globals.
-  if (SanOpts.Address && NeedsGlobalCtor) {
-    llvm::Module &M = getModule();
-
-    llvm::NamedMDNode *DynamicInitializers =
-        M.getOrInsertNamedMetadata("llvm.asan.dynamically_initialized_globals");
-    llvm::Value *GlobalToAdd[] = { GV };
-    llvm::MDNode *ThisGlobal = llvm::MDNode::get(VMContext, GlobalToAdd);
-    DynamicInitializers->addOperand(ThisGlobal);
-  }
+  reportGlobalToASan(GV, *D, NeedsGlobalCtor);
 
   // Emit global variable debug information.
   if (CGDebugInfo *DI = getModuleDebugInfo())
@@ -1911,47 +1949,164 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
       DI->EmitGlobalVariable(GV, D);
 }
 
-llvm::GlobalValue::LinkageTypes
-CodeGenModule::GetLLVMLinkageVarDefinition(const VarDecl *D, bool isConstant) {
-  GVALinkage Linkage = getContext().GetGVALinkageForVariable(D);
+void CodeGenModule::reportGlobalToASan(llvm::GlobalVariable *GV,
+                                       SourceLocation Loc, StringRef Name,
+                                       bool IsDynInit, bool IsBlacklisted) {
+  if (!LangOpts.Sanitize.Address)
+    return;
+  IsDynInit &= !SanitizerBL.isIn(*GV, "init");
+  IsBlacklisted |= SanitizerBL.isIn(*GV);
+
+  llvm::GlobalVariable *LocDescr = nullptr;
+  llvm::GlobalVariable *GlobalName = nullptr;
+  if (!IsBlacklisted) {
+    // Don't generate source location and global name if it is blacklisted -
+    // it won't be instrumented anyway.
+    PresumedLoc PLoc = Context.getSourceManager().getPresumedLoc(Loc);
+    if (PLoc.isValid()) {
+      llvm::Constant *LocData[] = {
+          GetAddrOfConstantCString(PLoc.getFilename()),
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext),
+                                 PLoc.getLine()),
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext),
+                                 PLoc.getColumn()),
+      };
+      auto LocStruct = llvm::ConstantStruct::getAnon(LocData);
+      LocDescr = new llvm::GlobalVariable(TheModule, LocStruct->getType(), true,
+                                          llvm::GlobalValue::PrivateLinkage,
+                                          LocStruct, ".asan_loc_descr");
+      LocDescr->setUnnamedAddr(true);
+      // Add LocDescr to llvm.compiler.used, so that it won't be removed by
+      // the optimizer before the ASan instrumentation pass.
+      addCompilerUsedGlobal(LocDescr);
+    }
+    if (!Name.empty()) {
+      GlobalName = GetAddrOfConstantCString(Name);
+      // GlobalName shouldn't be removed by the optimizer.
+      addCompilerUsedGlobal(GlobalName);
+    }
+  }
+
+  llvm::Value *GlobalMetadata[] = {
+      GV, LocDescr, GlobalName,
+      llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsDynInit),
+      llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsBlacklisted)};
+
+  llvm::MDNode *ThisGlobal = llvm::MDNode::get(VMContext, GlobalMetadata);
+  llvm::NamedMDNode *AsanGlobals =
+      TheModule.getOrInsertNamedMetadata("llvm.asan.globals");
+  AsanGlobals->addOperand(ThisGlobal);
+}
+
+void CodeGenModule::reportGlobalToASan(llvm::GlobalVariable *GV,
+                                       const VarDecl &D, bool IsDynInit) {
+  if (!LangOpts.Sanitize.Address)
+    return;
+  std::string QualName;
+  llvm::raw_string_ostream OS(QualName);
+  D.printQualifiedName(OS);
+  reportGlobalToASan(GV, D.getLocation(), OS.str(), IsDynInit);
+}
+
+void CodeGenModule::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
+  // For now, just make sure the global is not modified by the ASan
+  // instrumentation.
+  if (LangOpts.Sanitize.Address)
+    reportGlobalToASan(GV, SourceLocation(), "", false, true);
+}
+
+static bool isVarDeclStrongDefinition(const VarDecl *D, bool NoCommon) {
+  // Don't give variables common linkage if -fno-common was specified unless it
+  // was overridden by a NoCommon attribute.
+  if ((NoCommon || D->hasAttr<NoCommonAttr>()) && !D->hasAttr<CommonAttr>())
+    return true;
+
+  // C11 6.9.2/2:
+  //   A declaration of an identifier for an object that has file scope without
+  //   an initializer, and without a storage-class specifier or with the
+  //   storage-class specifier static, constitutes a tentative definition.
+  if (D->getInit() || D->hasExternalStorage())
+    return true;
+
+  // A variable cannot be both common and exist in a section.
+  if (D->hasAttr<SectionAttr>())
+    return true;
+
+  // Thread local vars aren't considered common linkage.
+  if (D->getTLSKind())
+    return true;
+
+  // Tentative definitions marked with WeakImportAttr are true definitions.
+  if (D->hasAttr<WeakImportAttr>())
+    return true;
+
+  return false;
+}
+
+llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
+    const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable) {
   if (Linkage == GVA_Internal)
     return llvm::Function::InternalLinkage;
-  else if (D->hasAttr<DLLImportAttr>())
-    return llvm::Function::DLLImportLinkage;
-  else if (D->hasAttr<DLLExportAttr>())
-    return llvm::Function::DLLExportLinkage;
-  else if (D->hasAttr<SelectAnyAttr>()) {
-    // selectany symbols are externally visible, so use weak instead of
-    // linkonce.  MSVC optimizes away references to const selectany globals, so
-    // all definitions should be the same and ODR linkage should be used.
-    // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx
-    return llvm::GlobalVariable::WeakODRLinkage;
-  } else if (D->hasAttr<WeakAttr>()) {
-    if (isConstant)
+
+  if (D->hasAttr<WeakAttr>()) {
+    if (IsConstantVariable)
       return llvm::GlobalVariable::WeakODRLinkage;
     else
       return llvm::GlobalVariable::WeakAnyLinkage;
-  } else if (Linkage == GVA_TemplateInstantiation ||
-             Linkage == GVA_ExplicitTemplateInstantiation)
-    return llvm::GlobalVariable::WeakODRLinkage;
-  else if (!getLangOpts().CPlusPlus && 
-           ((!CodeGenOpts.NoCommon && !D->getAttr<NoCommonAttr>()) ||
-             D->getAttr<CommonAttr>()) &&
-           !D->hasExternalStorage() && !D->getInit() &&
-           !D->getAttr<SectionAttr>() && !D->getTLSKind() &&
-           !D->getAttr<WeakImportAttr>()) {
-    // Thread local vars aren't considered common linkage.
+  }
+
+  // We are guaranteed to have a strong definition somewhere else,
+  // so we can use available_externally linkage.
+  if (Linkage == GVA_AvailableExternally)
+    return llvm::Function::AvailableExternallyLinkage;
+
+  // Note that Apple's kernel linker doesn't support symbol
+  // coalescing, so we need to avoid linkonce and weak linkages there.
+  // Normally, this means we just map to internal, but for explicit
+  // instantiations we'll map to external.
+
+  // In C++, the compiler has to emit a definition in every translation unit
+  // that references the function.  We should use linkonce_odr because
+  // a) if all references in this translation unit are optimized away, we
+  // don't need to codegen it.  b) if the function persists, it needs to be
+  // merged with other definitions. c) C++ has the ODR, so we know the
+  // definition is dependable.
+  if (Linkage == GVA_DiscardableODR)
+    return !Context.getLangOpts().AppleKext ? llvm::Function::LinkOnceODRLinkage
+                                            : llvm::Function::InternalLinkage;
+
+  // An explicit instantiation of a template has weak linkage, since
+  // explicit instantiations can occur in multiple translation units
+  // and must all be equivalent. However, we are not allowed to
+  // throw away these explicit instantiations.
+  if (Linkage == GVA_StrongODR)
+    return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
+                                            : llvm::Function::ExternalLinkage;
+
+  // C++ doesn't have tentative definitions and thus cannot have common
+  // linkage.
+  if (!getLangOpts().CPlusPlus && isa<VarDecl>(D) &&
+      !isVarDeclStrongDefinition(cast<VarDecl>(D), CodeGenOpts.NoCommon))
     return llvm::GlobalVariable::CommonLinkage;
-  } else if (D->getTLSKind() == VarDecl::TLS_Dynamic &&
-             getTarget().getTriple().isMacOSX())
-    // On Darwin, the backing variable for a C++11 thread_local variable always
-    // has internal linkage; all accesses should just be calls to the
-    // Itanium-specified entry point, which has the normal linkage of the
-    // variable.
-    return llvm::GlobalValue::InternalLinkage;
+
+  // selectany symbols are externally visible, so use weak instead of
+  // linkonce.  MSVC optimizes away references to const selectany globals, so
+  // all definitions should be the same and ODR linkage should be used.
+  // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx
+  if (D->hasAttr<SelectAnyAttr>())
+    return llvm::GlobalVariable::WeakODRLinkage;
+
+  // Otherwise, we have strong external linkage.
+  assert(Linkage == GVA_StrongExternal);
   return llvm::GlobalVariable::ExternalLinkage;
 }
 
+llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition(
+    const VarDecl *VD, bool IsConstant) {
+  GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD);
+  return getLLVMLinkageForDeclarator(VD, Linkage, IsConstant);
+}
+
 /// Replace the uses of a function that was declared with a non-proto type.
 /// We want to silently drop extra arguments from call sites
 static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
@@ -1965,11 +2120,11 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
   for (llvm::Value::use_iterator ui = old->use_begin(), ue = old->use_end();
          ui != ue; ) {
     llvm::Value::use_iterator use = ui++; // Increment before the use is erased.
-    llvm::User *user = *use;
+    llvm::User *user = use->getUser();
 
     // Recognize and replace uses of bitcasts.  Most calls to
     // unprototyped functions will use bitcasts.
-    if (llvm::ConstantExpr *bitcast = dyn_cast<llvm::ConstantExpr>(user)) {
+    if (auto *bitcast = dyn_cast<llvm::ConstantExpr>(user)) {
       if (bitcast->getOpcode() == llvm::Instruction::BitCast)
         replaceUsesOfNonProtoConstant(bitcast, newFn);
       continue;
@@ -1978,7 +2133,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
     // Recognize calls to the function.
     llvm::CallSite callSite(user);
     if (!callSite) continue;
-    if (!callSite.isCallee(use)) continue;
+    if (!callSite.isCallee(&*use)) continue;
 
     // If the return types don't match exactly, then we can't
     // transform this call unless it's dead.
@@ -2033,8 +2188,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
       newCall = llvm::CallInst::Create(newFn, newArgs, "",
                                        callSite.getInstruction());
     } else {
-      llvm::InvokeInst *oldInvoke =
-        cast<llvm::InvokeInst>(callSite.getInstruction());
+      auto *oldInvoke = cast<llvm::InvokeInst>(callSite.getInstruction());
       newCall = llvm::InvokeInst::Create(newFn,
                                          oldInvoke->getNormalDest(),
                                          oldInvoke->getUnwindDest(),
@@ -2087,33 +2241,36 @@ void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
   EmitTopLevelDecl(VD);
 }
 
-void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD) {
-  const FunctionDecl *D = cast<FunctionDecl>(GD.getDecl());
+void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
+                                                 llvm::GlobalValue *GV) {
+  const auto *D = cast<FunctionDecl>(GD.getDecl());
 
   // Compute the function info and LLVM type.
   const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
   llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
 
   // Get or create the prototype for the function.
-  llvm::Constant *Entry = GetAddrOfFunction(GD, Ty);
-
-  // Strip off a bitcast if we got one back.
-  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
-    assert(CE->getOpcode() == llvm::Instruction::BitCast);
-    Entry = CE->getOperand(0);
+  if (!GV) {
+    llvm::Constant *C =
+        GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer*/ true);
+
+    // Strip off a bitcast if we got one back.
+    if (auto *CE = dyn_cast<llvm::ConstantExpr>(C)) {
+      assert(CE->getOpcode() == llvm::Instruction::BitCast);
+      GV = cast<llvm::GlobalValue>(CE->getOperand(0));
+    } else {
+      GV = cast<llvm::GlobalValue>(C);
+    }
   }
 
-  if (!cast<llvm::GlobalValue>(Entry)->isDeclaration()) {
+  if (!GV->isDeclaration()) {
     getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name);
     return;
   }
 
-  if (cast<llvm::GlobalValue>(Entry)->getType()->getElementType() != Ty) {
-    llvm::GlobalValue *OldFn = cast<llvm::GlobalValue>(Entry);
-
+  if (GV->getType()->getElementType() != Ty) {
     // If the types mismatch then we have to rewrite the definition.
-    assert(OldFn->isDeclaration() &&
-           "Shouldn't replace non-declaration");
+    assert(GV->isDeclaration() && "Shouldn't replace non-declaration");
 
     // F is the Function* for the one with the wrong type, we must make a new
     // Function* and update everything that used F (a declaration) with the new
@@ -2123,8 +2280,8 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD) {
     // (e.g. "int f()") and then a definition of a different type
     // (e.g. "int f(int x)").  Move the old function aside so that it
     // doesn't interfere with GetAddrOfFunction.
-    OldFn->setName(StringRef());
-    llvm::Function *NewFn = cast<llvm::Function>(GetAddrOfFunction(GD, Ty));
+    GV->setName(StringRef());
+    auto *NewFn = cast<llvm::Function>(GetAddrOfFunction(GD, Ty));
 
     // This might be an implementation of a function without a
     // prototype, in which case, try to do special replacement of
@@ -2133,39 +2290,39 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD) {
     // so as to make a direct call, which makes the inliner happier
     // and suppresses a number of optimizer warnings (!) about
     // dropping arguments.
-    if (!OldFn->use_empty()) {
-      ReplaceUsesOfNonProtoTypeWithRealFunction(OldFn, NewFn);
-      OldFn->removeDeadConstantUsers();
+    if (!GV->use_empty()) {
+      ReplaceUsesOfNonProtoTypeWithRealFunction(GV, NewFn);
+      GV->removeDeadConstantUsers();
     }
 
     // Replace uses of F with the Function we will endow with a body.
-    if (!Entry->use_empty()) {
+    if (!GV->use_empty()) {
       llvm::Constant *NewPtrForOldDecl =
-        llvm::ConstantExpr::getBitCast(NewFn, Entry->getType());
-      Entry->replaceAllUsesWith(NewPtrForOldDecl);
+          llvm::ConstantExpr::getBitCast(NewFn, GV->getType());
+      GV->replaceAllUsesWith(NewPtrForOldDecl);
     }
 
     // Ok, delete the old function now, which is dead.
-    OldFn->eraseFromParent();
+    GV->eraseFromParent();
 
-    Entry = NewFn;
+    GV = NewFn;
   }
 
   // We need to set linkage and visibility on the function before
   // generating code for it because various parts of IR generation
   // want to propagate this information down (e.g. to local static
   // declarations).
-  llvm::Function *Fn = cast<llvm::Function>(Entry);
+  auto *Fn = cast<llvm::Function>(GV);
   setFunctionLinkage(GD, Fn);
 
-  // FIXME: this is redundant with part of SetFunctionDefinitionAttributes
+  // FIXME: this is redundant with part of setFunctionDefinitionAttributes
   setGlobalVisibility(Fn, D);
 
   MaybeHandleStaticInExternC(D, Fn);
 
   CodeGenFunction(*this).GenerateCode(D, Fn, FI);
 
-  SetFunctionDefinitionAttributes(D, Fn);
+  setFunctionDefinitionAttributes(D, Fn);
   SetLLVMFunctionAttributesForDefinition(D, Fn);
 
   if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
@@ -2177,7 +2334,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD) {
 }
 
 void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
-  const ValueDecl *D = cast<ValueDecl>(GD.getDecl());
+  const auto *D = cast<ValueDecl>(GD.getDecl());
   const AliasAttr *AA = D->getAttr<AliasAttr>();
   assert(AA && "Not an alias?");
 
@@ -2201,15 +2358,20 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
                                       /*ForVTable=*/false);
   else
     Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(),
-                                    llvm::PointerType::getUnqual(DeclTy), 0);
+                                    llvm::PointerType::getUnqual(DeclTy),
+                                    nullptr);
 
   // Create the new alias itself, but don't set a name yet.
-  llvm::GlobalValue *GA =
-    new llvm::GlobalAlias(Aliasee->getType(),
-                          llvm::Function::ExternalLinkage,
-                          "", Aliasee, &getModule());
+  auto *GA = llvm::GlobalAlias::create(
+      cast<llvm::PointerType>(Aliasee->getType())->getElementType(), 0,
+      llvm::Function::ExternalLinkage, "", Aliasee, &getModule());
 
   if (Entry) {
+    if (GA->getAliasee() == Entry) {
+      Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+      return;
+    }
+
     assert(Entry->isDeclaration());
 
     // If there is a declaration in the module, then we had an extern followed
@@ -2232,12 +2394,12 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
   // specialization of the attributes which may be set on a global
   // variable/function.
   if (D->hasAttr<DLLExportAttr>()) {
-    if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
       // The dllexport attribute is ignored for undefined symbols.
       if (FD->hasBody())
-        GA->setLinkage(llvm::Function::DLLExportLinkage);
+        GA->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
     } else {
-      GA->setLinkage(llvm::Function::DLLExportLinkage);
+      GA->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
     }
   } else if (D->hasAttr<WeakAttr>() ||
              D->hasAttr<WeakRefAttr>() ||
@@ -2330,8 +2492,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
 
   QualType CFTy = getContext().getCFConstantStringType();
 
-  llvm::StructType *STy =
-    cast<llvm::StructType>(getTypes().ConvertType(CFTy));
+  auto *STy = cast<llvm::StructType>(getTypes().ConvertType(CFTy));
 
   llvm::Constant *Fields[4];
 
@@ -2344,7 +2505,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
     llvm::ConstantInt::get(Ty, 0x07C8);
 
   // String pointer.
-  llvm::Constant *C = 0;
+  llvm::Constant *C = nullptr;
   if (isUTF16) {
     ArrayRef<uint16_t> Arr =
       llvm::makeArrayRef<uint16_t>(reinterpret_cast<uint16_t*>(
@@ -2355,30 +2516,25 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
     C = llvm::ConstantDataArray::getString(VMContext, Entry.getKey());
   }
 
-  llvm::GlobalValue::LinkageTypes Linkage;
-  if (isUTF16)
-    // FIXME: why do utf strings get "_" labels instead of "L" labels?
-    Linkage = llvm::GlobalValue::InternalLinkage;
-  else
-    // FIXME: With OS X ld 123.2 (xcode 4) and LTO we would get a linker error
-    // when using private linkage. It is not clear if this is a bug in ld
-    // or a reasonable new restriction.
-    Linkage = llvm::GlobalValue::LinkerPrivateLinkage;
-  
   // Note: -fwritable-strings doesn't make the backing store strings of
   // CFStrings writable. (See <rdar://problem/10657500>)
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
-                             Linkage, C, ".str");
+  auto *GV =
+      new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
+                               llvm::GlobalValue::PrivateLinkage, C, ".str");
   GV->setUnnamedAddr(true);
   // Don't enforce the target's minimum global alignment, since the only use
   // of the string is via this class initializer.
+  // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. Without
+  // it LLVM can merge the string with a non unnamed_addr one during LTO. Doing
+  // that changes the section it ends in, which surprises ld64.
   if (isUTF16) {
     CharUnits Align = getContext().getTypeAlignInChars(getContext().ShortTy);
     GV->setAlignment(Align.getQuantity());
+    GV->setSection("__TEXT,__ustring");
   } else {
     CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
     GV->setAlignment(Align.getQuantity());
+    GV->setSection("__TEXT,__cstring,cstring_literals");
   }
 
   // String.
@@ -2397,23 +2553,12 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
                                 llvm::GlobalVariable::PrivateLinkage, C,
                                 "_unnamed_cfstring_");
-  if (const char *Sect = getTarget().getCFStringSection())
-    GV->setSection(Sect);
+  GV->setSection("__DATA,__cfstring");
   Entry.setValue(GV);
 
   return GV;
 }
 
-static RecordDecl *
-CreateRecordDecl(const ASTContext &Ctx, RecordDecl::TagKind TK,
-                 DeclContext *DC, IdentifierInfo *Id) {
-  SourceLocation Loc;
-  if (Ctx.getLangOpts().CPlusPlus)
-    return CXXRecordDecl::Create(Ctx, TK, DC, Loc, Loc, Id);
-  else
-    return RecordDecl::Create(Ctx, TK, DC, Loc, Loc, Id);
-}
-
 llvm::Constant *
 CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
   unsigned StringLength = 0;
@@ -2456,9 +2601,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
 
   if (!NSConstantStringType) {
     // Construct the type for a constant NSString.
-    RecordDecl *D = CreateRecordDecl(Context, TTK_Struct, 
-                                     Context.getTranslationUnitDecl(),
-                                   &Context.Idents.get("__builtin_NSString"));
+    RecordDecl *D = Context.buildImplicitRecord("__builtin_NSString");
     D->startDefinition();
       
     QualType FieldTypes[3];
@@ -2474,9 +2617,9 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
     for (unsigned i = 0; i < 3; ++i) {
       FieldDecl *Field = FieldDecl::Create(Context, D,
                                            SourceLocation(),
-                                           SourceLocation(), 0,
-                                           FieldTypes[i], /*TInfo=*/0,
-                                           /*BitWidth=*/0,
+                                           SourceLocation(), nullptr,
+                                           FieldTypes[i], /*TInfo=*/nullptr,
+                                           /*BitWidth=*/nullptr,
                                            /*Mutable=*/false,
                                            ICIS_NoInit);
       Field->setAccess(AS_public);
@@ -2501,10 +2644,9 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
   bool isConstant;
   Linkage = llvm::GlobalValue::PrivateLinkage;
   isConstant = !LangOpts.WritableStrings;
-  
-  llvm::GlobalVariable *GV =
-  new llvm::GlobalVariable(getModule(), C->getType(), isConstant, Linkage, C,
-                           ".str");
+
+  auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), isConstant,
+                                      Linkage, C, ".str");
   GV->setUnnamedAddr(true);
   // Don't enforce the target's minimum global alignment, since the only use
   // of the string is via this class initializer.
@@ -2521,12 +2663,13 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
   GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
                                 llvm::GlobalVariable::PrivateLinkage, C,
                                 "_unnamed_nsstring_");
+  const char *NSStringSection = "__OBJC,__cstring_object,regular,no_dead_strip";
+  const char *NSStringNonFragileABISection =
+      "__DATA,__objc_stringobj,regular,no_dead_strip";
   // FIXME. Fix section.
-  if (const char *Sect = 
-        LangOpts.ObjCRuntime.isNonFragile() 
-          ? getTarget().getNSStringNonFragileABISection() 
-          : getTarget().getNSStringSection())
-    GV->setSection(Sect);
+  GV->setSection(LangOpts.ObjCRuntime.isNonFragile()
+                     ? NSStringNonFragileABISection
+                     : NSStringSection);
   Entry.setValue(GV);
   
   return GV;
@@ -2534,9 +2677,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
 
 QualType CodeGenModule::getObjCFastEnumerationStateType() {
   if (ObjCFastEnumerationStateType.isNull()) {
-    RecordDecl *D = CreateRecordDecl(Context, TTK_Struct, 
-                                     Context.getTranslationUnitDecl(),
-                      &Context.Idents.get("__objcFastEnumerationState"));
+    RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState");
     D->startDefinition();
     
     QualType FieldTypes[] = {
@@ -2551,9 +2692,9 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() {
       FieldDecl *Field = FieldDecl::Create(Context,
                                            D,
                                            SourceLocation(),
-                                           SourceLocation(), 0,
-                                           FieldTypes[i], /*TInfo=*/0,
-                                           /*BitWidth=*/0,
+                                           SourceLocation(), nullptr,
+                                           FieldTypes[i], /*TInfo=*/nullptr,
+                                           /*BitWidth=*/nullptr,
                                            /*Mutable=*/false,
                                            ICIS_NoInit);
       Field->setAccess(AS_public);
@@ -2581,9 +2722,8 @@ CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) {
     Str.resize(CAT->getSize().getZExtValue());
     return llvm::ConstantDataArray::getString(VMContext, Str, false);
   }
-  
-  llvm::ArrayType *AType =
-    cast<llvm::ArrayType>(getTypes().ConvertType(E->getType()));
+
+  auto *AType = cast<llvm::ArrayType>(getTypes().ConvertType(E->getType()));
   llvm::Type *ElemTy = AType->getElementType();
   unsigned NumElements = AType->getNumElements();
 
@@ -2608,36 +2748,73 @@ CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) {
   return llvm::ConstantDataArray::get(VMContext, Elements);
 }
 
+static llvm::GlobalVariable *
+GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
+                      CodeGenModule &CGM, StringRef GlobalName,
+                      unsigned Alignment) {
+  // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
+  unsigned AddrSpace = 0;
+  if (CGM.getLangOpts().OpenCL)
+    AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
+
+  // Create a global variable for this string
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), C->getType(), !CGM.getLangOpts().WritableStrings, LT, C,
+      GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+  GV->setAlignment(Alignment);
+  GV->setUnnamedAddr(true);
+  return GV;
+}
+
 /// GetAddrOfConstantStringFromLiteral - Return a pointer to a
 /// constant array for the given string literal.
-llvm::Constant *
+llvm::GlobalVariable *
 CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) {
-  CharUnits Align = getContext().getAlignOfGlobalVarInChars(S->getType());
-  if (S->isAscii() || S->isUTF8()) {
-    SmallString<64> Str(S->getString());
-    
-    // Resize the string to the right size, which is indicated by its type.
-    const ConstantArrayType *CAT = Context.getAsConstantArrayType(S->getType());
-    Str.resize(CAT->getSize().getZExtValue());
-    return GetAddrOfConstantString(Str, /*GlobalName*/ 0, Align.getQuantity());
-  }
+  auto Alignment =
+      getContext().getAlignOfGlobalVarInChars(S->getType()).getQuantity();
 
-  // FIXME: the following does not memoize wide strings.
   llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(),C->getType(),
-                             !LangOpts.WritableStrings,
-                             llvm::GlobalValue::PrivateLinkage,
-                             C,".str");
+  llvm::GlobalVariable **Entry = nullptr;
+  if (!LangOpts.WritableStrings) {
+    Entry = &ConstantStringMap[C];
+    if (auto GV = *Entry) {
+      if (Alignment > GV->getAlignment())
+        GV->setAlignment(Alignment);
+      return GV;
+    }
+  }
 
-  GV->setAlignment(Align.getQuantity());
-  GV->setUnnamedAddr(true);
+  SmallString<256> MangledNameBuffer;
+  StringRef GlobalVariableName;
+  llvm::GlobalValue::LinkageTypes LT;
+
+  // Mangle the string literal if the ABI allows for it.  However, we cannot
+  // do this if  we are compiling with ASan or -fwritable-strings because they
+  // rely on strings having normal linkage.
+  if (!LangOpts.WritableStrings && !LangOpts.Sanitize.Address &&
+      getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) {
+    llvm::raw_svector_ostream Out(MangledNameBuffer);
+    getCXXABI().getMangleContext().mangleStringLiteral(S, Out);
+    Out.flush();
+
+    LT = llvm::GlobalValue::LinkOnceODRLinkage;
+    GlobalVariableName = MangledNameBuffer;
+  } else {
+    LT = llvm::GlobalValue::PrivateLinkage;
+    GlobalVariableName = ".str";
+  }
+
+  auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, Alignment);
+  if (Entry)
+    *Entry = GV;
+
+  reportGlobalToASan(GV, S->getStrTokenLoc(0), "<string literal>");
   return GV;
 }
 
 /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant
 /// array for the given ObjCEncodeExpr node.
-llvm::Constant *
+llvm::GlobalVariable *
 CodeGenModule::GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *E) {
   std::string Str;
   getContext().getObjCEncodingForType(E->getEncodedType(), Str);
@@ -2645,87 +2822,48 @@ CodeGenModule::GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *E) {
   return GetAddrOfConstantCString(Str);
 }
 
-
-/// GenerateWritableString -- Creates storage for a string literal.
-static llvm::GlobalVariable *GenerateStringLiteral(StringRef str,
-                                             bool constant,
-                                             CodeGenModule &CGM,
-                                             const char *GlobalName,
-                                             unsigned Alignment) {
-  // Create Constant for this string literal. Don't add a '\0'.
-  llvm::Constant *C =
-      llvm::ConstantDataArray::getString(CGM.getLLVMContext(), str, false);
-
-  // OpenCL v1.1 s6.5.3: a string literal is in the constant address space.
-  unsigned AddrSpace = 0;
-  if (CGM.getLangOpts().OpenCL)
-    AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
-
-  // Create a global variable for this string
-  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
-      CGM.getModule(), C->getType(), constant,
-      llvm::GlobalValue::PrivateLinkage, C, GlobalName, 0,
-      llvm::GlobalVariable::NotThreadLocal, AddrSpace);
-  GV->setAlignment(Alignment);
-  GV->setUnnamedAddr(true);
-  return GV;
-}
-
-/// GetAddrOfConstantString - Returns a pointer to a character array
-/// containing the literal. This contents are exactly that of the
-/// given string, i.e. it will not be null terminated automatically;
-/// see GetAddrOfConstantCString. Note that whether the result is
-/// actually a pointer to an LLVM constant depends on
-/// Feature.WriteableStrings.
-///
+/// GetAddrOfConstantCString - Returns a pointer to a character array containing
+/// the literal and a terminating '\0' character.
 /// The result has pointer to array type.
-llvm::Constant *CodeGenModule::GetAddrOfConstantString(StringRef Str,
-                                                       const char *GlobalName,
-                                                       unsigned Alignment) {
-  // Get the default prefix if a name wasn't specified.
-  if (!GlobalName)
-    GlobalName = ".str";
+llvm::GlobalVariable *CodeGenModule::GetAddrOfConstantCString(
+    const std::string &Str, const char *GlobalName, unsigned Alignment) {
+  StringRef StrWithNull(Str.c_str(), Str.size() + 1);
+  if (Alignment == 0) {
+    Alignment = getContext()
+                    .getAlignOfGlobalVarInChars(getContext().CharTy)
+                    .getQuantity();
+  }
 
-  if (Alignment == 0)
-    Alignment = getContext().getAlignOfGlobalVarInChars(getContext().CharTy)
-      .getQuantity();
+  llvm::Constant *C =
+      llvm::ConstantDataArray::getString(getLLVMContext(), StrWithNull, false);
 
   // Don't share any string literals if strings aren't constant.
-  if (LangOpts.WritableStrings)
-    return GenerateStringLiteral(Str, false, *this, GlobalName, Alignment);
-
-  llvm::StringMapEntry<llvm::GlobalVariable *> &Entry =
-    ConstantStringMap.GetOrCreateValue(Str);
-
-  if (llvm::GlobalVariable *GV = Entry.getValue()) {
-    if (Alignment > GV->getAlignment()) {
-      GV->setAlignment(Alignment);
+  llvm::GlobalVariable **Entry = nullptr;
+  if (!LangOpts.WritableStrings) {
+    Entry = &ConstantStringMap[C];
+    if (auto GV = *Entry) {
+      if (Alignment > GV->getAlignment())
+        GV->setAlignment(Alignment);
+      return GV;
     }
-    return GV;
   }
 
+  // Get the default prefix if a name wasn't specified.
+  if (!GlobalName)
+    GlobalName = ".str";
   // Create a global variable for this.
-  llvm::GlobalVariable *GV = GenerateStringLiteral(Str, true, *this, GlobalName,
-                                                   Alignment);
-  Entry.setValue(GV);
+  auto GV = GenerateStringLiteral(C, llvm::GlobalValue::PrivateLinkage, *this,
+                                  GlobalName, Alignment);
+  if (Entry)
+    *Entry = GV;
   return GV;
 }
 
-/// GetAddrOfConstantCString - Returns a pointer to a character
-/// array containing the literal and a terminating '\0'
-/// character. The result has pointer to array type.
-llvm::Constant *CodeGenModule::GetAddrOfConstantCString(const std::string &Str,
-                                                        const char *GlobalName,
-                                                        unsigned Alignment) {
-  StringRef StrWithNull(Str.c_str(), Str.size() + 1);
-  return GetAddrOfConstantString(StrWithNull, GlobalName, Alignment);
-}
-
 llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
     const MaterializeTemporaryExpr *E, const Expr *Init) {
   assert((E->getStorageDuration() == SD_Static ||
           E->getStorageDuration() == SD_Thread) && "not a global temporary");
-  const VarDecl *VD = cast<VarDecl>(E->getExtendingDecl());
+  const auto *VD = cast<VarDecl>(E->getExtendingDecl());
 
   // If we're not materializing a subobject of the temporary, keep the
   // cv-qualifiers from the type of the MaterializeTemporaryExpr.
@@ -2742,10 +2880,11 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
   // we also need to make the temporaries externally-visible).
   SmallString<256> Name;
   llvm::raw_svector_ostream Out(Name);
-  getCXXABI().getMangleContext().mangleReferenceTemporary(VD, Out);
+  getCXXABI().getMangleContext().mangleReferenceTemporary(
+      VD, E->getManglingNumber(), Out);
   Out.flush();
 
-  APValue *Value = 0;
+  APValue *Value = nullptr;
   if (E->getStorageDuration() == SD_Static) {
     // We might have a cached constant initializer for this temporary. Note
     // that this might have a different value from the value computed by
@@ -2753,7 +2892,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
     // modifies the temporary.
     Value = getContext().getMaterializedTemporaryValue(E, false);
     if (Value && Value->isUninit())
-      Value = 0;
+      Value = nullptr;
   }
 
   // Try evaluating it now, it might have a constant initializer.
@@ -2762,12 +2901,12 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
       !EvalResult.hasSideEffects())
     Value = &EvalResult.Val;
 
-  llvm::Constant *InitialValue = 0;
+  llvm::Constant *InitialValue = nullptr;
   bool Constant = false;
   llvm::Type *Type;
   if (Value) {
     // The temporary has a constant initializer, use it.
-    InitialValue = EmitConstantValue(*Value, MaterializedType, 0);
+    InitialValue = EmitConstantValue(*Value, MaterializedType, nullptr);
     Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/Value);
     Type = InitialValue->getType();
   } else {
@@ -2777,10 +2916,19 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
   }
 
   // Create a global variable for this lifetime-extended temporary.
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(), Type, Constant,
-                             llvm::GlobalValue::PrivateLinkage,
-                             InitialValue, Name.c_str());
+  llvm::GlobalValue::LinkageTypes Linkage =
+      getLLVMLinkageVarDefinition(VD, Constant);
+  // There is no need for this temporary to have global linkage if the global
+  // variable has external linkage.
+  if (Linkage == llvm::GlobalVariable::ExternalLinkage)
+    Linkage = llvm::GlobalVariable::PrivateLinkage;
+  unsigned AddrSpace = GetGlobalVarAddressSpace(
+      VD, getContext().getTargetAddressSpace(MaterializedType));
+  auto *GV = new llvm::GlobalVariable(
+      getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(),
+      /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+      AddrSpace);
+  setGlobalVisibility(GV, VD);
   GV->setAlignment(
       getContext().getTypeAlignInChars(MaterializedType).getQuantity());
   if (VD->getTLSKind())
@@ -2793,10 +2941,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
 /// properties for an implementation.
 void CodeGenModule::EmitObjCPropertyImplementations(const
                                                     ObjCImplementationDecl *D) {
-  for (ObjCImplementationDecl::propimpl_iterator
-         i = D->propimpl_begin(), e = D->propimpl_end(); i != e; ++i) {
-    ObjCPropertyImplDecl *PID = *i;
-
+  for (const auto *PID : D->property_impls()) {
     // Dynamic is just for type-checking.
     if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) {
       ObjCPropertyDecl *PD = PID->getPropertyDecl();
@@ -2836,7 +2981,7 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) {
     Selector cxxSelector = getContext().Selectors.getSelector(0, &II);
     ObjCMethodDecl *DTORMethod =
       ObjCMethodDecl::Create(getContext(), D->getLocation(), D->getLocation(),
-                             cxxSelector, getContext().VoidTy, 0, D,
+                             cxxSelector, getContext().VoidTy, nullptr, D,
                              /*isInstance=*/true, /*isVariadic=*/false,
                           /*isPropertyAccessor=*/true, /*isImplicitlyDeclared=*/true,
                              /*isDefined=*/false, ObjCMethodDecl::Required);
@@ -2857,8 +3002,8 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) {
                                                 D->getLocation(),
                                                 D->getLocation(),
                                                 cxxSelector,
-                                                getContext().getObjCIdType(), 0, 
-                                                D, /*isInstance=*/true,
+                                                getContext().getObjCIdType(),
+                                                nullptr, D, /*isInstance=*/true,
                                                 /*isVariadic=*/false,
                                                 /*isPropertyAccessor=*/true,
                                                 /*isImplicitlyDeclared=*/true,
@@ -2871,13 +3016,12 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) {
 
 /// EmitNamespace - Emit all declarations in a namespace.
 void CodeGenModule::EmitNamespace(const NamespaceDecl *ND) {
-  for (RecordDecl::decl_iterator I = ND->decls_begin(), E = ND->decls_end();
-       I != E; ++I) {
-    if (const VarDecl *VD = dyn_cast<VarDecl>(*I))
+  for (auto *I : ND->decls()) {
+    if (const auto *VD = dyn_cast<VarDecl>(I))
       if (VD->getTemplateSpecializationKind() != TSK_ExplicitSpecialization &&
           VD->getTemplateSpecializationKind() != TSK_Undeclared)
         continue;
-    EmitTopLevelDecl(*I);
+    EmitTopLevelDecl(I);
   }
 }
 
@@ -2889,17 +3033,14 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
     return;
   }
 
-  for (RecordDecl::decl_iterator I = LSD->decls_begin(), E = LSD->decls_end();
-       I != E; ++I) {
+  for (auto *I : LSD->decls()) {
     // Meta-data for ObjC class includes references to implemented methods.
     // Generate class's method definitions first.
-    if (ObjCImplDecl *OID = dyn_cast<ObjCImplDecl>(*I)) {
-      for (ObjCContainerDecl::method_iterator M = OID->meth_begin(),
-           MEnd = OID->meth_end();
-           M != MEnd; ++M)
-        EmitTopLevelDecl(*M);
+    if (auto *OID = dyn_cast<ObjCImplDecl>(I)) {
+      for (auto *M : OID->methods())
+        EmitTopLevelDecl(M);
     }
-    EmitTopLevelDecl(*I);
+    EmitTopLevelDecl(I);
   }
 }
 
@@ -2940,7 +3081,6 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
     // No code generation needed.
   case Decl::UsingShadow:
-  case Decl::Using:
   case Decl::ClassTemplate:
   case Decl::VarTemplate:
   case Decl::VarTemplatePartialSpecialization:
@@ -2949,6 +3089,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
   case Decl::Block:
   case Decl::Empty:
     break;
+  case Decl::Using:          // using X; [C++]
+    if (CGDebugInfo *DI = getModuleDebugInfo())
+        DI->EmitUsingDecl(cast<UsingDecl>(*D));
+    return;
   case Decl::NamespaceAlias:
     if (CGDebugInfo *DI = getModuleDebugInfo())
         DI->EmitNamespaceAlias(cast<NamespaceAliasDecl>(*D));
@@ -2983,7 +3127,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
 
   case Decl::ObjCProtocol: {
-    ObjCProtocolDecl *Proto = cast<ObjCProtocolDecl>(D);
+    auto *Proto = cast<ObjCProtocolDecl>(D);
     if (Proto->isThisDeclarationADefinition())
       ObjCRuntime->GenerateProtocol(Proto);
     break;
@@ -2996,7 +3140,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
 
   case Decl::ObjCImplementation: {
-    ObjCImplementationDecl *OMD = cast<ObjCImplementationDecl>(D);
+    auto *OMD = cast<ObjCImplementationDecl>(D);
     EmitObjCPropertyImplementations(OMD);
     EmitObjCIvarInitializations(OMD);
     ObjCRuntime->GenerateClass(OMD);
@@ -3008,7 +3152,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
   }
   case Decl::ObjCMethod: {
-    ObjCMethodDecl *OMD = cast<ObjCMethodDecl>(D);
+    auto *OMD = cast<ObjCMethodDecl>(D);
     // If this is not a prototype, emit the body.
     if (OMD->getBody())
       CodeGenFunction(*this).GenerateObjCMethod(OMD);
@@ -3023,7 +3167,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
 
   case Decl::FileScopeAsm: {
-    FileScopeAsmDecl *AD = cast<FileScopeAsmDecl>(D);
+    auto *AD = cast<FileScopeAsmDecl>(D);
     StringRef AsmString = AD->getAsmString()->getString();
 
     const std::string &S = getModule().getModuleInlineAsm();
@@ -3037,7 +3181,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
   }
 
   case Decl::Import: {
-    ImportDecl *Import = cast<ImportDecl>(D);
+    auto *Import = cast<ImportDecl>(D);
 
     // Ignore import declarations that come from imported modules.
     if (clang::Module *Owner = Import->getOwningModule()) {
@@ -3048,7 +3192,14 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
 
     ImportedModules.insert(Import->getImportedModule());
     break;
- }
+  }
+
+  case Decl::ClassTemplateSpecialization: {
+    const auto *Spec = cast<ClassTemplateSpecializationDecl>(D);
+    if (DebugInfo &&
+        Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition)
+      DebugInfo->completeTemplateDefinition(*Spec);
+  }
 
   default:
     // Make sure we handled everything we should, every other kind is a
@@ -3094,11 +3245,19 @@ void CodeGenModule::EmitStaticExternCAliases() {
     IdentifierInfo *Name = I->first;
     llvm::GlobalValue *Val = I->second;
     if (Val && !getModule().getNamedValue(Name->getName()))
-      AddUsedGlobal(new llvm::GlobalAlias(Val->getType(), Val->getLinkage(),
-                                          Name->getName(), Val, &getModule()));
+      addUsedGlobal(llvm::GlobalAlias::create(Name->getName(), Val));
   }
 }
 
+bool CodeGenModule::lookupRepresentativeDecl(StringRef MangledName,
+                                             GlobalDecl &Result) const {
+  auto Res = Manglings.find(MangledName);
+  if (Res == Manglings.end())
+    return false;
+  Result = Res->getValue();
+  return true;
+}
+
 /// Emits metadata nodes associating all the global values in the
 /// current module with the Decls they came from.  This is useful for
 /// projects using IR gen as a subroutine.
@@ -3107,14 +3266,12 @@ void CodeGenModule::EmitStaticExternCAliases() {
 /// with an llvm::GlobalValue, we create a global named metadata
 /// with the name 'clang.global.decl.ptrs'.
 void CodeGenModule::EmitDeclMetadata() {
-  llvm::NamedMDNode *GlobalMetadata = 0;
+  llvm::NamedMDNode *GlobalMetadata = nullptr;
 
   // StaticLocalDeclMap
-  for (llvm::DenseMap<GlobalDecl,StringRef>::iterator
-         I = MangledDeclNames.begin(), E = MangledDeclNames.end();
-       I != E; ++I) {
-    llvm::GlobalValue *Addr = getModule().getNamedValue(I->second);
-    EmitGlobalDeclMetadata(*this, GlobalMetadata, I->first, Addr);
+  for (auto &I : MangledDeclNames) {
+    llvm::GlobalValue *Addr = getModule().getNamedValue(I.second);
+    EmitGlobalDeclMetadata(*this, GlobalMetadata, I.first, Addr);
   }
 }
 
@@ -3128,17 +3285,15 @@ void CodeGenFunction::EmitDeclMetadata() {
   // Find the unique metadata ID for this name.
   unsigned DeclPtrKind = Context.getMDKindID("clang.decl.ptr");
 
-  llvm::NamedMDNode *GlobalMetadata = 0;
-
-  for (llvm::DenseMap<const Decl*, llvm::Value*>::iterator
-         I = LocalDeclMap.begin(), E = LocalDeclMap.end(); I != E; ++I) {
-    const Decl *D = I->first;
-    llvm::Value *Addr = I->second;
+  llvm::NamedMDNode *GlobalMetadata = nullptr;
 
-    if (llvm::AllocaInst *Alloca = dyn_cast<llvm::AllocaInst>(Addr)) {
+  for (auto &I : LocalDeclMap) {
+    const Decl *D = I.first;
+    llvm::Value *Addr = I.second;
+    if (auto *Alloca = dyn_cast<llvm::AllocaInst>(Addr)) {
       llvm::Value *DAddr = GetPointerConstant(getLLVMContext(), D);
       Alloca->setMetadata(DeclPtrKind, llvm::MDNode::get(Context, DAddr));
-    } else if (llvm::GlobalValue *GV = dyn_cast<llvm::GlobalValue>(Addr)) {
+    } else if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr)) {
       GlobalDecl GD = GlobalDecl(cast<VarDecl>(D));
       EmitGlobalDeclMetadata(CGM, GlobalMetadata, GD, GV);
     }
@@ -3157,6 +3312,14 @@ void CodeGenModule::EmitVersionIdentMetadata() {
   IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode));
 }
 
+void CodeGenModule::EmitTargetMetadata() {
+  for (auto &I : MangledDeclNames) {
+    const Decl *D = I.first.getDecl()->getMostRecentDecl();
+    llvm::GlobalValue *GV = GetGlobalValue(I.second);
+    getTargetCodeGenInfo().emitTargetMD(D, GV, *this);
+  }
+}
+
 void CodeGenModule::EmitCoverageFile() {
   if (!getCodeGenOpts().CoverageFile.empty()) {
     if (llvm::NamedMDNode *CUNode = TheModule.getNamedMetadata("llvm.dbg.cu")) {
@@ -3200,3 +3363,19 @@ llvm::Constant *CodeGenModule::EmitUuidofInitializer(StringRef Uuid,
 
   return llvm::ConstantStruct::getAnon(Fields);
 }
+
+llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
+                                                       bool ForEH) {
+  // Return a bogus pointer if RTTI is disabled, unless it's for EH.
+  // FIXME: should we even be calling this method if RTTI is disabled
+  // and it's not for EH?
+  if (!ForEH && !getLangOpts().RTTI)
+    return llvm::Constant::getNullValue(Int8PtrTy);
+  
+  if (ForEH && Ty->isObjCObjectPointerType() &&
+      LangOpts.ObjCRuntime.isGNUFamily())
+    return ObjCRuntime->GetEHType(Ty);
+
+  return getCXXABI().getAddrOfRTTIDescriptor(Ty);
+}
+
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index c16122405d43..9533a8dabb20 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -16,6 +16,7 @@
 
 #include "CGVTables.h"
 #include "CodeGenTypes.h"
+#include "SanitizerBlacklist.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
@@ -30,129 +31,124 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include "llvm/IR/ValueHandle.h"
 
 namespace llvm {
-  class Module;
-  class Constant;
-  class ConstantInt;
-  class Function;
-  class GlobalValue;
-  class DataLayout;
-  class FunctionType;
-  class LLVMContext;
+class Module;
+class Constant;
+class ConstantInt;
+class Function;
+class GlobalValue;
+class DataLayout;
+class FunctionType;
+class LLVMContext;
+class IndexedInstrProfReader;
 }
 
 namespace clang {
-  class TargetCodeGenInfo;
-  class ASTContext;
-  class AtomicType;
-  class FunctionDecl;
-  class IdentifierInfo;
-  class ObjCMethodDecl;
-  class ObjCImplementationDecl;
-  class ObjCCategoryImplDecl;
-  class ObjCProtocolDecl;
-  class ObjCEncodeExpr;
-  class BlockExpr;
-  class CharUnits;
-  class Decl;
-  class Expr;
-  class Stmt;
-  class InitListExpr;
-  class StringLiteral;
-  class NamedDecl;
-  class ValueDecl;
-  class VarDecl;
-  class LangOptions;
-  class CodeGenOptions;
-  class DiagnosticsEngine;
-  class AnnotateAttr;
-  class CXXDestructorDecl;
-  class MangleBuffer;
-  class Module;
+class TargetCodeGenInfo;
+class ASTContext;
+class AtomicType;
+class FunctionDecl;
+class IdentifierInfo;
+class ObjCMethodDecl;
+class ObjCImplementationDecl;
+class ObjCCategoryImplDecl;
+class ObjCProtocolDecl;
+class ObjCEncodeExpr;
+class BlockExpr;
+class CharUnits;
+class Decl;
+class Expr;
+class Stmt;
+class InitListExpr;
+class StringLiteral;
+class NamedDecl;
+class ValueDecl;
+class VarDecl;
+class LangOptions;
+class CodeGenOptions;
+class DiagnosticsEngine;
+class AnnotateAttr;
+class CXXDestructorDecl;
+class Module;
 
 namespace CodeGen {
 
-  class CallArgList;
-  class CodeGenFunction;
-  class CodeGenTBAA;
-  class CGCXXABI;
-  class CGDebugInfo;
-  class CGObjCRuntime;
-  class CGOpenCLRuntime;
-  class CGCUDARuntime;
-  class BlockFieldFlags;
-  class FunctionArgList;
-  
-  struct OrderGlobalInits {
-    unsigned int priority;
-    unsigned int lex_order;
-    OrderGlobalInits(unsigned int p, unsigned int l) 
+class CallArgList;
+class CodeGenFunction;
+class CodeGenTBAA;
+class CGCXXABI;
+class CGDebugInfo;
+class CGObjCRuntime;
+class CGOpenCLRuntime;
+class CGOpenMPRuntime;
+class CGCUDARuntime;
+class BlockFieldFlags;
+class FunctionArgList;
+
+struct OrderGlobalInits {
+  unsigned int priority;
+  unsigned int lex_order;
+  OrderGlobalInits(unsigned int p, unsigned int l)
       : priority(p), lex_order(l) {}
-    
-    bool operator==(const OrderGlobalInits &RHS) const {
-      return priority == RHS.priority &&
-             lex_order == RHS.lex_order;
-    }
-    
-    bool operator<(const OrderGlobalInits &RHS) const {
-      if (priority < RHS.priority)
-        return true;
-      
-      return priority == RHS.priority && lex_order < RHS.lex_order;
-    }
+
+  bool operator==(const OrderGlobalInits &RHS) const {
+    return priority == RHS.priority && lex_order == RHS.lex_order;
+  }
+
+  bool operator<(const OrderGlobalInits &RHS) const {
+    return std::tie(priority, lex_order) <
+           std::tie(RHS.priority, RHS.lex_order);
+  }
+};
+
+struct CodeGenTypeCache {
+  /// void
+  llvm::Type *VoidTy;
+
+  /// i8, i16, i32, and i64
+  llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
+  /// float, double
+  llvm::Type *FloatTy, *DoubleTy;
+
+  /// int
+  llvm::IntegerType *IntTy;
+
+  /// intptr_t, size_t, and ptrdiff_t, which we assume are the same size.
+  union {
+    llvm::IntegerType *IntPtrTy;
+    llvm::IntegerType *SizeTy;
+    llvm::IntegerType *PtrDiffTy;
   };
 
-  struct CodeGenTypeCache {
-    /// void
-    llvm::Type *VoidTy;
-
-    /// i8, i16, i32, and i64
-    llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
-    /// float, double
-    llvm::Type *FloatTy, *DoubleTy;
-
-    /// int
-    llvm::IntegerType *IntTy;
-
-    /// intptr_t, size_t, and ptrdiff_t, which we assume are the same size.
-    union {
-      llvm::IntegerType *IntPtrTy;
-      llvm::IntegerType *SizeTy;
-      llvm::IntegerType *PtrDiffTy;
-    };
-
-    /// void* in address space 0
-    union {
-      llvm::PointerType *VoidPtrTy;
-      llvm::PointerType *Int8PtrTy;
-    };
-
-    /// void** in address space 0
-    union {
-      llvm::PointerType *VoidPtrPtrTy;
-      llvm::PointerType *Int8PtrPtrTy;
-    };
-
-    /// The width of a pointer into the generic address space.
-    unsigned char PointerWidthInBits;
-
-    /// The size and alignment of a pointer into the generic address
-    /// space.
-    union {
-      unsigned char PointerAlignInBytes;
-      unsigned char PointerSizeInBytes;
-      unsigned char SizeSizeInBytes;     // sizeof(size_t)
-    };
-
-    llvm::CallingConv::ID RuntimeCC;
-    llvm::CallingConv::ID getRuntimeCC() const {
-      return RuntimeCC;
-    }
+  /// void* in address space 0
+  union {
+    llvm::PointerType *VoidPtrTy;
+    llvm::PointerType *Int8PtrTy;
   };
 
+  /// void** in address space 0
+  union {
+    llvm::PointerType *VoidPtrPtrTy;
+    llvm::PointerType *Int8PtrPtrTy;
+  };
+
+  /// The width of a pointer into the generic address space.
+  unsigned char PointerWidthInBits;
+
+  /// The size and alignment of a pointer into the generic address
+  /// space.
+  union {
+    unsigned char PointerAlignInBytes;
+    unsigned char PointerSizeInBytes;
+    unsigned char SizeSizeInBytes; // sizeof(size_t)
+  };
+
+  llvm::CallingConv::ID RuntimeCC;
+  llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
+};
+
 struct RREntrypoints {
   RREntrypoints() { memset(this, 0, sizeof(*this)); }
   /// void objc_autoreleasePoolPop(void*);
@@ -221,13 +217,57 @@ struct ARCEntrypoints {
   llvm::Constant *clang_arc_use;
 };
 
-/// CodeGenModule - This class organizes the cross-function state that is used
-/// while generating LLVM code.
+/// This class records statistics on instrumentation based profiling.
+class InstrProfStats {
+  uint32_t VisitedInMainFile;
+  uint32_t MissingInMainFile;
+  uint32_t Visited;
+  uint32_t Missing;
+  uint32_t Mismatched;
+
+public:
+  InstrProfStats()
+      : VisitedInMainFile(0), MissingInMainFile(0), Visited(0), Missing(0),
+        Mismatched(0) {}
+  /// Record that we've visited a function and whether or not that function was
+  /// in the main source file.
+  void addVisited(bool MainFile) {
+    if (MainFile)
+      ++VisitedInMainFile;
+    ++Visited;
+  }
+  /// Record that a function we've visited has no profile data.
+  void addMissing(bool MainFile) {
+    if (MainFile)
+      ++MissingInMainFile;
+    ++Missing;
+  }
+  /// Record that a function we've visited has mismatched profile data.
+  void addMismatched(bool MainFile) { ++Mismatched; }
+  /// Whether or not the stats we've gathered indicate any potential problems.
+  bool hasDiagnostics() { return Missing || Mismatched; }
+  /// Report potential problems we've found to \c Diags.
+  void reportDiagnostics(DiagnosticsEngine &Diags, StringRef MainFile);
+};
+
+/// This class organizes the cross-function state that is used while generating
+/// LLVM code.
 class CodeGenModule : public CodeGenTypeCache {
   CodeGenModule(const CodeGenModule &) LLVM_DELETED_FUNCTION;
   void operator=(const CodeGenModule &) LLVM_DELETED_FUNCTION;
 
-  typedef std::vector<std::pair<llvm::Constant*, int> > CtorList;
+  struct Structor {
+    Structor() : Priority(0), Initializer(nullptr), AssociatedData(nullptr) {}
+    Structor(int Priority, llvm::Constant *Initializer,
+             llvm::Constant *AssociatedData)
+        : Priority(Priority), Initializer(Initializer),
+          AssociatedData(AssociatedData) {}
+    int Priority;
+    llvm::Constant *Initializer;
+    llvm::Constant *AssociatedData;
+  };
+
+  typedef std::vector<Structor> CtorList;
 
   ASTContext &Context;
   const LangOptions &LangOpts;
@@ -236,7 +276,7 @@ class CodeGenModule : public CodeGenTypeCache {
   DiagnosticsEngine &Diags;
   const llvm::DataLayout &TheDataLayout;
   const TargetInfo &Target;
-  CGCXXABI &ABI;
+  std::unique_ptr<CGCXXABI> ABI;
   llvm::LLVMContext &VMContext;
 
   CodeGenTBAA *TBAA;
@@ -248,32 +288,42 @@ class CodeGenModule : public CodeGenTypeCache {
   // if TheTargetCodeGenInfo is NULL
   CodeGenTypes Types;
  
-  /// VTables - Holds information about C++ vtables.
+  /// Holds information about C++ vtables.
   CodeGenVTables VTables;
 
   CGObjCRuntime* ObjCRuntime;
   CGOpenCLRuntime* OpenCLRuntime;
+  CGOpenMPRuntime* OpenMPRuntime;
   CGCUDARuntime* CUDARuntime;
   CGDebugInfo* DebugInfo;
   ARCEntrypoints *ARCData;
   llvm::MDNode *NoObjCARCExceptionsMetadata;
   RREntrypoints *RRData;
+  std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader;
+  InstrProfStats PGOStats;
 
-  // WeakRefReferences - A set of references that have only been seen via
-  // a weakref so far. This is used to remove the weak of the reference if we
-  // ever see a direct reference or a definition.
+  // A set of references that have only been seen via a weakref so far. This is
+  // used to remove the weak of the reference if we ever see a direct reference
+  // or a definition.
   llvm::SmallPtrSet<llvm::GlobalValue*, 10> WeakRefReferences;
 
-  /// DeferredDecls - This contains all the decls which have definitions but
-  /// which are deferred for emission and therefore should only be output if
-  /// they are actually used.  If a decl is in this, then it is known to have
-  /// not been referenced yet.
-  llvm::StringMap<GlobalDecl> DeferredDecls;
-
-  /// DeferredDeclsToEmit - This is a list of deferred decls which we have seen
-  /// that *are* actually referenced.  These get code generated when the module
-  /// is done.
-  std::vector<GlobalDecl> DeferredDeclsToEmit;
+  /// This contains all the decls which have definitions but/ which are deferred
+  /// for emission and therefore should only be output if they are actually
+  /// used. If a decl is in this, then it is known to have not been referenced
+  /// yet.
+  std::map<StringRef, GlobalDecl> DeferredDecls;
+
+  /// This is a list of deferred decls which we have seen that *are* actually
+  /// referenced. These get code generated when the module is done.
+  struct DeferredGlobal {
+    DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {}
+    llvm::AssertingVH<llvm::GlobalValue> GV;
+    GlobalDecl GD;
+  };
+  std::vector<DeferredGlobal> DeferredDeclsToEmit;
+  void addDeferredDeclToEmit(llvm::GlobalValue *GV, GlobalDecl GD) {
+    DeferredDeclsToEmit.push_back(DeferredGlobal(GV, GD));
+  }
 
   /// List of alias we have emitted. Used to make sure that what they point to
   /// is defined once we get to the end of the of the translation unit.
@@ -282,27 +332,27 @@ class CodeGenModule : public CodeGenTypeCache {
   typedef llvm::StringMap<llvm::TrackingVH<llvm::Constant> > ReplacementsTy;
   ReplacementsTy Replacements;
 
-  /// DeferredVTables - A queue of (optional) vtables to consider emitting.
+  /// A queue of (optional) vtables to consider emitting.
   std::vector<const CXXRecordDecl*> DeferredVTables;
 
-  /// LLVMUsed - List of global values which are required to be
-  /// present in the object file; bitcast to i8*. This is used for
-  /// forcing visibility of symbols which may otherwise be optimized
-  /// out.
+  /// List of global values which are required to be present in the object file;
+  /// bitcast to i8*. This is used for forcing visibility of symbols which may
+  /// otherwise be optimized out.
   std::vector<llvm::WeakVH> LLVMUsed;
+  std::vector<llvm::WeakVH> LLVMCompilerUsed;
 
-  /// GlobalCtors - Store the list of global constructors and their respective
-  /// priorities to be emitted when the translation unit is complete.
+  /// Store the list of global constructors and their respective priorities to
+  /// be emitted when the translation unit is complete.
   CtorList GlobalCtors;
 
-  /// GlobalDtors - Store the list of global destructors and their respective
-  /// priorities to be emitted when the translation unit is complete.
+  /// Store the list of global destructors and their respective priorities to be
+  /// emitted when the translation unit is complete.
   CtorList GlobalDtors;
 
-  /// MangledDeclNames - A map of canonical GlobalDecls to their mangled names.
-  llvm::DenseMap<GlobalDecl, StringRef> MangledDeclNames;
-  llvm::BumpPtrAllocator MangledNamesAllocator;
-  
+  /// An ordered map of canonical GlobalDecls to their mangled names.
+  llvm::MapVector<GlobalDecl, StringRef> MangledDeclNames;
+  llvm::StringMap<GlobalDecl, llvm::BumpPtrAllocator> Manglings;
+
   /// Global annotations.
   std::vector<llvm::Constant*> Annotations;
 
@@ -310,7 +360,8 @@ class CodeGenModule : public CodeGenTypeCache {
   llvm::StringMap<llvm::Constant*> AnnotationStrings;
 
   llvm::StringMap<llvm::Constant*> CFConstantStringMap;
-  llvm::StringMap<llvm::GlobalVariable*> ConstantStringMap;
+
+  llvm::DenseMap<llvm::Constant *, llvm::GlobalVariable *> ConstantStringMap;
   llvm::DenseMap<const Decl*, llvm::Constant *> StaticLocalDeclMap;
   llvm::DenseMap<const Decl*, llvm::GlobalVariable*> StaticLocalDeclGuardMap;
   llvm::DenseMap<const Expr*, llvm::Constant *> MaterializedGlobalTemporaryMap;
@@ -335,8 +386,7 @@ class CodeGenModule : public CodeGenTypeCache {
   /// before any thread_local variable in this TU is odr-used.
   std::vector<llvm::Constant*> CXXThreadLocalInits;
 
-  /// CXXGlobalInits - Global variables with initializers that need to run
-  /// before main.
+  /// Global variables with initializers that need to run before main.
   std::vector<llvm::Constant*> CXXGlobalInits;
 
   /// When a C++ decl with an initializer is deferred, null is
@@ -354,12 +404,11 @@ class CodeGenModule : public CodeGenTypeCache {
     }
   };
 
-  /// - Global variables with initializers whose order of initialization
-  /// is set by init_priority attribute.
+  /// Global variables with initializers whose order of initialization is set by
+  /// init_priority attribute.
   SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits;
 
-  /// CXXGlobalDtors - Global destructor functions and arguments that need to
-  /// run on termination.
+  /// Global destructor functions and arguments that need to run on termination.
   std::vector<std::pair<llvm::WeakVH,llvm::Constant*> > CXXGlobalDtors;
 
   /// \brief The complete set of modules that has been imported.
@@ -371,12 +420,12 @@ class CodeGenModule : public CodeGenTypeCache {
   /// @name Cache for Objective-C runtime types
   /// @{
 
-  /// CFConstantStringClassRef - Cached reference to the class for constant
-  /// strings. This value has type int * but is actually an Obj-C class pointer.
+  /// Cached reference to the class for constant strings. This value has type
+  /// int * but is actually an Obj-C class pointer.
   llvm::WeakVH CFConstantStringClassRef;
 
-  /// ConstantStringClassRef - Cached reference to the class for constant
-  /// strings. This value has type int * but is actually an Obj-C class pointer.
+  /// Cached reference to the class for constant strings. This value has type
+  /// int * but is actually an Obj-C class pointer.
   llvm::WeakVH ConstantStringClassRef;
 
   /// \brief The LLVM type corresponding to NSConstantString.
@@ -392,6 +441,7 @@ class CodeGenModule : public CodeGenTypeCache {
   void createObjCRuntime();
 
   void createOpenCLRuntime();
+  void createOpenMPRuntime();
   void createCUDARuntime();
 
   bool isTriviallyRecursive(const FunctionDecl *F);
@@ -421,9 +471,7 @@ class CodeGenModule : public CodeGenTypeCache {
 
   GlobalDecl initializedGlobalDecl;
 
-  llvm::OwningPtr<llvm::SpecialCaseList> SanitizerBlacklist;
-
-  const SanitizerOptions &SanOpts;
+  SanitizerBlacklist SanitizerBL;
 
   /// @}
 public:
@@ -433,42 +481,51 @@ public:
 
   ~CodeGenModule();
 
-  /// Release - Finalize LLVM code generation.
+  void clear();
+
+  /// Finalize LLVM code generation.
   void Release();
 
-  /// getObjCRuntime() - Return a reference to the configured
-  /// Objective-C runtime.
+  /// Return a reference to the configured Objective-C runtime.
   CGObjCRuntime &getObjCRuntime() {
     if (!ObjCRuntime) createObjCRuntime();
     return *ObjCRuntime;
   }
 
-  /// hasObjCRuntime() - Return true iff an Objective-C runtime has
-  /// been configured.
+  /// Return true iff an Objective-C runtime has been configured.
   bool hasObjCRuntime() { return !!ObjCRuntime; }
 
-  /// getOpenCLRuntime() - Return a reference to the configured OpenCL runtime.
+  /// Return a reference to the configured OpenCL runtime.
   CGOpenCLRuntime &getOpenCLRuntime() {
-    assert(OpenCLRuntime != 0);
+    assert(OpenCLRuntime != nullptr);
     return *OpenCLRuntime;
   }
 
-  /// getCUDARuntime() - Return a reference to the configured CUDA runtime.
+  /// Return a reference to the configured OpenMP runtime.
+  CGOpenMPRuntime &getOpenMPRuntime() {
+    assert(OpenMPRuntime != nullptr);
+    return *OpenMPRuntime;
+  }
+
+  /// Return a reference to the configured CUDA runtime.
   CGCUDARuntime &getCUDARuntime() {
-    assert(CUDARuntime != 0);
+    assert(CUDARuntime != nullptr);
     return *CUDARuntime;
   }
 
   ARCEntrypoints &getARCEntrypoints() const {
-    assert(getLangOpts().ObjCAutoRefCount && ARCData != 0);
+    assert(getLangOpts().ObjCAutoRefCount && ARCData != nullptr);
     return *ARCData;
   }
 
   RREntrypoints &getRREntrypoints() const {
-    assert(RRData != 0);
+    assert(RRData != nullptr);
     return *RRData;
   }
 
+  InstrProfStats &getPGOStats() { return PGOStats; }
+  llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
+
   llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
     return StaticLocalDeclMap[D];
   }
@@ -485,6 +542,9 @@ public:
     StaticLocalDeclGuardMap[D] = C;
   }
 
+  bool lookupRepresentativeDecl(StringRef MangledName,
+                                GlobalDecl &Result) const;
+
   llvm::Constant *getAtomicSetterHelperFnMap(QualType Ty) {
     return AtomicSetterHelperFnMap[Ty];
   }
@@ -501,10 +561,10 @@ public:
     AtomicGetterHelperFnMap[Ty] = Fn;
   }
 
-  llvm::Constant *getTypeDescriptor(QualType Ty) {
+  llvm::Constant *getTypeDescriptorFromMap(QualType Ty) {
     return TypeDescriptorMap[Ty];
   }
-  void setTypeDescriptor(QualType Ty, llvm::Constant *C) {
+  void setTypeDescriptorInMap(QualType Ty, llvm::Constant *C) {
     TypeDescriptorMap[Ty] = C;
   }
 
@@ -525,10 +585,10 @@ public:
   DiagnosticsEngine &getDiags() const { return Diags; }
   const llvm::DataLayout &getDataLayout() const { return TheDataLayout; }
   const TargetInfo &getTarget() const { return Target; }
-  CGCXXABI &getCXXABI() { return ABI; }
+  CGCXXABI &getCXXABI() const { return *ABI; }
   llvm::LLVMContext &getLLVMContext() { return VMContext; }
-  
-  bool shouldUseTBAA() const { return TBAA != 0; }
+
+  bool shouldUseTBAA() const { return TBAA != nullptr; }
 
   const TargetCodeGenInfo &getTargetCodeGenInfo(); 
   
@@ -566,32 +626,16 @@ public:
                            llvm::MDNode *TBAAInfo,
                            bool ConvertTypeToTag = true);
 
-  /// getSize - Emit the given number of characters as a value of type size_t.
+  /// Emit the given number of characters as a value of type size_t.
   llvm::ConstantInt *getSize(CharUnits numChars);
 
-  /// setGlobalVisibility - Set the visibility for the given LLVM
-  /// GlobalValue.
+  /// Set the visibility for the given LLVM GlobalValue.
   void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const;
 
-  /// setTLSMode - Set the TLS mode for the given LLVM GlobalVariable
-  /// for the thread-local variable declaration D.
+  /// Set the TLS mode for the given LLVM GlobalVariable for the thread-local
+  /// variable declaration D.
   void setTLSMode(llvm::GlobalVariable *GV, const VarDecl &D) const;
 
-  /// TypeVisibilityKind - The kind of global variable that is passed to 
-  /// setTypeVisibility
-  enum TypeVisibilityKind {
-    TVK_ForVTT,
-    TVK_ForVTable,
-    TVK_ForConstructionVTable,
-    TVK_ForRTTI,
-    TVK_ForRTTIName
-  };
-
-  /// setTypeVisibility - Set the visibility for the given global
-  /// value which holds information about a type.
-  void setTypeVisibility(llvm::GlobalValue *GV, const CXXRecordDecl *D,
-                         TypeVisibilityKind TVK) const;
-
   static llvm::GlobalValue::VisibilityTypes GetLLVMVisibility(Visibility V) {
     switch (V) {
     case DefaultVisibility:   return llvm::GlobalValue::DefaultVisibility;
@@ -614,50 +658,47 @@ public:
       return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()));
   }
 
-  /// CreateOrReplaceCXXRuntimeVariable - Will return a global variable of the
-  /// given type. If a variable with a different type already exists then a new 
-  /// variable with the right type will be created and all uses of the old
-  /// variable will be replaced with a bitcast to the new variable.
+  /// Will return a global variable of the given type. If a variable with a
+  /// different type already exists then a new  variable with the right type
+  /// will be created and all uses of the old variable will be replaced with a
+  /// bitcast to the new variable.
   llvm::GlobalVariable *
   CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty,
                                     llvm::GlobalValue::LinkageTypes Linkage);
 
-  /// GetGlobalVarAddressSpace - Return the address space of the underlying
-  /// global variable for D, as determined by its declaration.  Normally this
-  /// is the same as the address space of D's type, but in CUDA, address spaces
-  /// are associated with declarations, not types.
+  /// Return the address space of the underlying global variable for D, as
+  /// determined by its declaration. Normally this is the same as the address
+  /// space of D's type, but in CUDA, address spaces are associated with
+  /// declarations, not types.
   unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace);
 
-  /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the
-  /// given global variable.  If Ty is non-null and if the global doesn't exist,
-  /// then it will be greated with the specified type instead of whatever the
-  /// normal requested type would be.
+  /// Return the llvm::Constant for the address of the given global variable.
+  /// If Ty is non-null and if the global doesn't exist, then it will be greated
+  /// with the specified type instead of whatever the normal requested type
+  /// would be.
   llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D,
-                                     llvm::Type *Ty = 0);
-
+                                     llvm::Type *Ty = nullptr);
 
-  /// GetAddrOfFunction - Return the address of the given function.  If Ty is
-  /// non-null, then this function will use the specified type if it has to
-  /// create it.
-  llvm::Constant *GetAddrOfFunction(GlobalDecl GD,
-                                    llvm::Type *Ty = 0,
-                                    bool ForVTable = false);
+  /// Return the address of the given function. If Ty is non-null, then this
+  /// function will use the specified type if it has to create it.
+  llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = 0,
+                                    bool ForVTable = false,
+                                    bool DontDefer = false);
 
-  /// GetAddrOfRTTIDescriptor - Get the address of the RTTI descriptor 
-  /// for the given type.
+  /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
-  /// GetAddrOfUuidDescriptor - Get the address of a uuid descriptor .
+  /// Get the address of a uuid descriptor .
   llvm::Constant *GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
 
-  /// GetAddrOfThunk - Get the address of the thunk for the given global decl.
+  /// Get the address of the thunk for the given global decl.
   llvm::Constant *GetAddrOfThunk(GlobalDecl GD, const ThunkInfo &Thunk);
 
-  /// GetWeakRefReference - Get a reference to the target of VD.
+  /// Get a reference to the target of VD.
   llvm::Constant *GetWeakRefReference(const ValueDecl *VD);
 
-  /// GetNonVirtualBaseClassOffset - Returns the offset from a derived class to 
-  /// a class. Returns null if the offset is 0. 
+  /// Returns the offset from a derived class to  a class. Returns null if the
+  /// offset is 0.
   llvm::Constant *
   GetNonVirtualBaseClassOffset(const CXXRecordDecl *ClassDecl,
                                CastExpr::path_const_iterator PathBegin,
@@ -693,67 +734,49 @@ public:
 
   llvm::FoldingSet<ByrefHelpers> ByrefHelpersCache;
 
-  /// getUniqueBlockCount - Fetches the global unique block count.
+  /// Fetches the global unique block count.
   int getUniqueBlockCount() { return ++Block.GlobalUniqueCount; }
   
-  /// getBlockDescriptorType - Fetches the type of a generic block
-  /// descriptor.
+  /// Fetches the type of a generic block descriptor.
   llvm::Type *getBlockDescriptorType();
 
-  /// getGenericBlockLiteralType - The type of a generic block literal.
+  /// The type of a generic block literal.
   llvm::Type *getGenericBlockLiteralType();
 
-  /// GetAddrOfGlobalBlock - Gets the address of a block which
-  /// requires no captures.
+  /// Gets the address of a block which requires no captures.
   llvm::Constant *GetAddrOfGlobalBlock(const BlockExpr *BE, const char *);
   
-  /// GetAddrOfConstantCFString - Return a pointer to a constant CFString object
-  /// for the given string.
+  /// Return a pointer to a constant CFString object for the given string.
   llvm::Constant *GetAddrOfConstantCFString(const StringLiteral *Literal);
-  
-  /// GetAddrOfConstantString - Return a pointer to a constant NSString object
-  /// for the given string. Or a user defined String object as defined via
+
+  /// Return a pointer to a constant NSString object for the given string. Or a
+  /// user defined String object as defined via
   /// -fconstant-string-class=class_name option.
   llvm::Constant *GetAddrOfConstantString(const StringLiteral *Literal);
 
-  /// GetConstantArrayFromStringLiteral - Return a constant array for the given
-  /// string.
+  /// Return a constant array for the given string.
   llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E);
 
-  /// GetAddrOfConstantStringFromLiteral - Return a pointer to a constant array
-  /// for the given string literal.
-  llvm::Constant *GetAddrOfConstantStringFromLiteral(const StringLiteral *S);
+  /// Return a pointer to a constant array for the given string literal.
+  llvm::GlobalVariable *
+  GetAddrOfConstantStringFromLiteral(const StringLiteral *S);
 
-  /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant
-  /// array for the given ObjCEncodeExpr node.
-  llvm::Constant *GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *);
+  /// Return a pointer to a constant array for the given ObjCEncodeExpr node.
+  llvm::GlobalVariable *
+  GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *);
 
-  /// GetAddrOfConstantString - Returns a pointer to a character array
-  /// containing the literal. This contents are exactly that of the given
-  /// string, i.e. it will not be null terminated automatically; see
-  /// GetAddrOfConstantCString. Note that whether the result is actually a
-  /// pointer to an LLVM constant depends on Feature.WriteableStrings.
-  ///
-  /// The result has pointer to array type.
-  ///
-  /// \param GlobalName If provided, the name to use for the global
-  /// (if one is created).
-  llvm::Constant *GetAddrOfConstantString(StringRef Str,
-                                          const char *GlobalName=0,
-                                          unsigned Alignment=0);
-
-  /// GetAddrOfConstantCString - Returns a pointer to a character array
-  /// containing the literal and a terminating '\0' character. The result has
-  /// pointer to array type.
+  /// Returns a pointer to a character array containing the literal and a
+  /// terminating '\0' character. The result has pointer to array type.
   ///
   /// \param GlobalName If provided, the name to use for the global (if one is
   /// created).
-  llvm::Constant *GetAddrOfConstantCString(const std::string &str,
-                                           const char *GlobalName=0,
-                                           unsigned Alignment=0);
+  llvm::GlobalVariable *
+  GetAddrOfConstantCString(const std::string &Str,
+                           const char *GlobalName = nullptr,
+                           unsigned Alignment = 0);
 
-  /// GetAddrOfConstantCompoundLiteral - Returns a pointer to a constant global
-  /// variable for the given file-scope compound literal expression.
+  /// Returns a pointer to a constant global variable for the given file-scope
+  /// compound literal expression.
   llvm::Constant *GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr*E);
 
   /// \brief Returns a pointer to a global variable representing a temporary
@@ -765,31 +788,31 @@ public:
   /// Objective-C fast enumeration loop (for..in).
   QualType getObjCFastEnumerationStateType();
   
-  /// GetAddrOfCXXConstructor - Return the address of the constructor of the
-  /// given type.
-  llvm::GlobalValue *GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor,
-                                             CXXCtorType ctorType,
-                                             const CGFunctionInfo *fnInfo = 0);
-
-  /// GetAddrOfCXXDestructor - Return the address of the constructor of the
-  /// given type.
-  llvm::GlobalValue *GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
-                                            CXXDtorType dtorType,
-                                            const CGFunctionInfo *fnInfo = 0,
-                                            llvm::FunctionType *fnType = 0);
-
-  /// getBuiltinLibFunction - Given a builtin id for a function like
-  /// "__builtin_fabsf", return a Function* for "fabsf".
+  /// Return the address of the constructor of the given type.
+  llvm::GlobalValue *
+  GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor, CXXCtorType ctorType,
+                          const CGFunctionInfo *fnInfo = nullptr,
+                          bool DontDefer = false);
+
+  /// Return the address of the constructor of the given type.
+  llvm::GlobalValue *
+  GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
+                         CXXDtorType dtorType,
+                         const CGFunctionInfo *fnInfo = nullptr,
+                         llvm::FunctionType *fnType = nullptr,
+                         bool DontDefer = false);
+
+  /// Given a builtin id for a function like "__builtin_fabsf", return a
+  /// Function* for "fabsf".
   llvm::Value *getBuiltinLibFunction(const FunctionDecl *FD,
                                      unsigned BuiltinID);
 
   llvm::Function *getIntrinsic(unsigned IID, ArrayRef<llvm::Type*> Tys = None);
 
-  /// EmitTopLevelDecl - Emit code for a single top level declaration.
+  /// Emit code for a single top level declaration.
   void EmitTopLevelDecl(Decl *D);
 
-  /// HandleCXXStaticMemberVarInstantiation - Tell the consumer that this
-  // variable has been instantiated.
+  /// Tell the consumer that this variable has been instantiated.
   void HandleCXXStaticMemberVarInstantiation(VarDecl *VD);
 
   /// \brief If the declaration has internal linkage but is inside an
@@ -798,25 +821,23 @@ public:
   template<typename SomeDecl>
   void MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV);
 
-  /// AddUsedGlobal - Add a global which should be forced to be
-  /// present in the object file; these are emitted to the llvm.used
-  /// metadata global.
-  void AddUsedGlobal(llvm::GlobalValue *GV);
+  /// Add a global to a list to be added to the llvm.used metadata.
+  void addUsedGlobal(llvm::GlobalValue *GV);
+
+  /// Add a global to a list to be added to the llvm.compiler.used metadata.
+  void addCompilerUsedGlobal(llvm::GlobalValue *GV);
 
-  /// AddCXXDtorEntry - Add a destructor and object to add to the C++ global
-  /// destructor function.
+  /// Add a destructor and object to add to the C++ global destructor function.
   void AddCXXDtorEntry(llvm::Constant *DtorFn, llvm::Constant *Object) {
     CXXGlobalDtors.push_back(std::make_pair(DtorFn, Object));
   }
 
-  /// CreateRuntimeFunction - Create a new runtime function with the specified
-  /// type and name.
+  /// Create a new runtime function with the specified type and name.
   llvm::Constant *CreateRuntimeFunction(llvm::FunctionType *Ty,
                                         StringRef Name,
                                         llvm::AttributeSet ExtraAttrs =
                                           llvm::AttributeSet());
-  /// CreateRuntimeVariable - Create a new runtime global variable with the
-  /// specified type and name.
+  /// Create a new runtime global variable with the specified type and name.
   llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
                                         StringRef Name);
 
@@ -833,85 +854,80 @@ public:
   llvm::Constant *getLLVMLifetimeStartFn();
   llvm::Constant *getLLVMLifetimeEndFn();
 
-  // UpdateCompleteType - Make sure that this type is translated.
+  // Make sure that this type is translated.
   void UpdateCompletedType(const TagDecl *TD);
 
   llvm::Constant *getMemberPointerConstant(const UnaryOperator *e);
 
-  /// EmitConstantInit - Try to emit the initializer for the given declaration
-  /// as a constant; returns 0 if the expression cannot be emitted as a
-  /// constant.
-  llvm::Constant *EmitConstantInit(const VarDecl &D, CodeGenFunction *CGF = 0);
+  /// Try to emit the initializer for the given declaration as a constant;
+  /// returns 0 if the expression cannot be emitted as a constant.
+  llvm::Constant *EmitConstantInit(const VarDecl &D,
+                                   CodeGenFunction *CGF = nullptr);
 
-  /// EmitConstantExpr - Try to emit the given expression as a
-  /// constant; returns 0 if the expression cannot be emitted as a
-  /// constant.
+  /// Try to emit the given expression as a constant; returns 0 if the
+  /// expression cannot be emitted as a constant.
   llvm::Constant *EmitConstantExpr(const Expr *E, QualType DestType,
-                                   CodeGenFunction *CGF = 0);
+                                   CodeGenFunction *CGF = nullptr);
 
-  /// EmitConstantValue - Emit the given constant value as a constant, in the
-  /// type's scalar representation.
+  /// Emit the given constant value as a constant, in the type's scalar
+  /// representation.
   llvm::Constant *EmitConstantValue(const APValue &Value, QualType DestType,
-                                    CodeGenFunction *CGF = 0);
+                                    CodeGenFunction *CGF = nullptr);
 
-  /// EmitConstantValueForMemory - Emit the given constant value as a constant,
-  /// in the type's memory representation.
+  /// Emit the given constant value as a constant, in the type's memory
+  /// representation.
   llvm::Constant *EmitConstantValueForMemory(const APValue &Value,
                                              QualType DestType,
-                                             CodeGenFunction *CGF = 0);
+                                             CodeGenFunction *CGF = nullptr);
 
-  /// EmitNullConstant - Return the result of value-initializing the given
-  /// type, i.e. a null expression of the given type.  This is usually,
-  /// but not always, an LLVM null constant.
+  /// Return the result of value-initializing the given type, i.e. a null
+  /// expression of the given type.  This is usually, but not always, an LLVM
+  /// null constant.
   llvm::Constant *EmitNullConstant(QualType T);
 
-  /// EmitNullConstantForBase - Return a null constant appropriate for 
-  /// zero-initializing a base class with the given type.  This is usually,
-  /// but not always, an LLVM null constant.
+  /// Return a null constant appropriate for zero-initializing a base class with
+  /// the given type. This is usually, but not always, an LLVM null constant.
   llvm::Constant *EmitNullConstantForBase(const CXXRecordDecl *Record);
 
-  /// Error - Emit a general error that something can't be done.
+  /// Emit a general error that something can't be done.
   void Error(SourceLocation loc, StringRef error);
 
-  /// ErrorUnsupported - Print out an error that codegen doesn't support the
-  /// specified stmt yet.
+  /// Print out an error that codegen doesn't support the specified stmt yet.
   void ErrorUnsupported(const Stmt *S, const char *Type);
 
-  /// ErrorUnsupported - Print out an error that codegen doesn't support the
-  /// specified decl yet.
+  /// Print out an error that codegen doesn't support the specified decl yet.
   void ErrorUnsupported(const Decl *D, const char *Type);
 
-  /// SetInternalFunctionAttributes - Set the attributes on the LLVM
-  /// function for the given decl and function info. This applies
-  /// attributes necessary for handling the ABI as well as user
-  /// specified attributes like section.
+  /// Set the attributes on the LLVM function for the given decl and function
+  /// info. This applies attributes necessary for handling the ABI as well as
+  /// user specified attributes like section.
   void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F,
                                      const CGFunctionInfo &FI);
 
-  /// SetLLVMFunctionAttributes - Set the LLVM function attributes
-  /// (sext, zext, etc).
+  /// Set the LLVM function attributes (sext, zext, etc).
   void SetLLVMFunctionAttributes(const Decl *D,
                                  const CGFunctionInfo &Info,
                                  llvm::Function *F);
 
-  /// SetLLVMFunctionAttributesForDefinition - Set the LLVM function attributes
-  /// which only apply to a function definintion.
+  /// Set the LLVM function attributes which only apply to a function
+  /// definintion.
   void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F);
 
-  /// ReturnTypeUsesSRet - Return true iff the given type uses 'sret' when used
-  /// as a return type.
+  /// Return true iff the given type uses 'sret' when used as a return type.
   bool ReturnTypeUsesSRet(const CGFunctionInfo &FI);
 
-  /// ReturnTypeUsesFPRet - Return true iff the given type uses 'fpret' when
-  /// used as a return type.
+  /// Return true iff the given type uses an argument slot when 'sret' is used
+  /// as a return type.
+  bool ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI);
+
+  /// Return true iff the given type uses 'fpret' when used as a return type.
   bool ReturnTypeUsesFPRet(QualType ResultType);
 
-  /// ReturnTypeUsesFP2Ret - Return true iff the given type uses 'fp2ret' when
-  /// used as a return type.
+  /// Return true iff the given type uses 'fp2ret' when used as a return type.
   bool ReturnTypeUsesFP2Ret(QualType ResultType);
 
-  /// ConstructAttributeList - Get the LLVM attributes and calling convention to
-  /// use for a particular function type.
+  /// Get the LLVM attributes and calling convention to use for a particular
+  /// function type.
   ///
   /// \param Info - The function type information.
   /// \param TargetDecl - The decl these attributes are being constructed
@@ -926,15 +942,13 @@ public:
                               bool AttrOnCallSite);
 
   StringRef getMangledName(GlobalDecl GD);
-  void getBlockMangledName(GlobalDecl GD, MangleBuffer &Buffer,
-                           const BlockDecl *BD);
+  StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD);
 
   void EmitTentativeDefinition(const VarDecl *D);
 
   void EmitVTable(CXXRecordDecl *Class, bool DefinitionRequired);
 
-  /// EmitFundamentalRTTIDescriptors - Emit the RTTI descriptors for the
-  /// builtin types.
+  /// Emit the RTTI descriptors for the builtin types.
   void EmitFundamentalRTTIDescriptors();
 
   /// \brief Appends Opts to the "Linker Options" metadata value.
@@ -948,23 +962,26 @@ public:
 
   llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD);
 
-  void setFunctionLinkage(GlobalDecl GD, llvm::GlobalValue *V) {
-    V->setLinkage(getFunctionLinkage(GD));
+  void setFunctionLinkage(GlobalDecl GD, llvm::Function *F) {
+    F->setLinkage(getFunctionLinkage(GD));
   }
 
-  /// getVTableLinkage - Return the appropriate linkage for the vtable, VTT,
-  /// and type information of the given class.
+  /// Return the appropriate linkage for the vtable, VTT, and type information
+  /// of the given class.
   llvm::GlobalVariable::LinkageTypes getVTableLinkage(const CXXRecordDecl *RD);
 
-  /// GetTargetTypeStoreSize - Return the store size, in character units, of
-  /// the given LLVM type.
+  /// Return the store size, in character units, of the given LLVM type.
   CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const;
   
-  /// GetLLVMLinkageVarDefinition - Returns LLVM linkage for a global 
-  /// variable.
-  llvm::GlobalValue::LinkageTypes 
-  GetLLVMLinkageVarDefinition(const VarDecl *D, bool isConstant);
-  
+  /// Returns LLVM linkage for a declarator.
+  llvm::GlobalValue::LinkageTypes
+  getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage,
+                              bool IsConstantVariable);
+
+  /// Returns LLVM linkage for a declarator.
+  llvm::GlobalValue::LinkageTypes
+  getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant);
+
   /// Emit all the global annotations.
   void EmitGlobalAnnotations();
 
@@ -977,8 +994,8 @@ public:
   /// Emit the annotation line number.
   llvm::Constant *EmitAnnotationLineNo(SourceLocation L);
 
-  /// EmitAnnotateAttr - Generate the llvm::ConstantStruct which contains the
-  /// annotation information for a given GlobalValue. The annotation struct is
+  /// Generate the llvm::ConstantStruct which contains the annotation
+  /// information for a given GlobalValue. The annotation struct is
   /// {i8 *, i8 *, i8 *, i32}. The first field is a constant expression, the
   /// GlobalValue being annotated. The second field is the constant string
   /// created from the AnnotateAttr's annotation. The third field is a constant
@@ -992,54 +1009,59 @@ public:
   /// annotations are emitted during finalization of the LLVM code.
   void AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV);
 
-  const llvm::SpecialCaseList &getSanitizerBlacklist() const {
-    return *SanitizerBlacklist;
+  const SanitizerBlacklist &getSanitizerBlacklist() const {
+    return SanitizerBL;
   }
 
-  const SanitizerOptions &getSanOpts() const { return SanOpts; }
+  void reportGlobalToASan(llvm::GlobalVariable *GV, const VarDecl &D,
+                          bool IsDynInit = false);
+  void reportGlobalToASan(llvm::GlobalVariable *GV, SourceLocation Loc,
+                          StringRef Name, bool IsDynInit = false,
+                          bool IsBlacklisted = false);
+
+  /// Disable sanitizer instrumentation for this global.
+  void disableSanitizerForGlobal(llvm::GlobalVariable *GV);
 
   void addDeferredVTable(const CXXRecordDecl *RD) {
     DeferredVTables.push_back(RD);
   }
 
-  /// EmitGlobal - Emit code for a singal global function or var decl. Forward
-  /// declarations are emitted lazily.
+  /// Emit code for a singal global function or var decl. Forward declarations
+  /// are emitted lazily.
   void EmitGlobal(GlobalDecl D);
 
 private:
   llvm::GlobalValue *GetGlobalValue(StringRef Ref);
 
-  llvm::Constant *GetOrCreateLLVMFunction(StringRef MangledName,
-                                          llvm::Type *Ty,
-                                          GlobalDecl D,
-                                          bool ForVTable,
-                                          llvm::AttributeSet ExtraAttrs =
-                                            llvm::AttributeSet());
+  llvm::Constant *
+  GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
+                          bool ForVTable, bool DontDefer = false,
+                          llvm::AttributeSet ExtraAttrs = llvm::AttributeSet());
+
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
                                         llvm::PointerType *PTy,
-                                        const VarDecl *D,
-                                        bool UnnamedAddr = false);
+                                        const VarDecl *D);
 
-  /// SetCommonAttributes - Set attributes which are common to any
-  /// form of a global definition (alias, Objective-C method,
-  /// function, global variable).
+  /// Set attributes which are common to any form of a global definition (alias,
+  /// Objective-C method, function, global variable).
   ///
   /// NOTE: This should only be called for definitions.
   void SetCommonAttributes(const Decl *D, llvm::GlobalValue *GV);
 
-  /// SetFunctionDefinitionAttributes - Set attributes for a global definition.
-  void SetFunctionDefinitionAttributes(const FunctionDecl *D,
-                                       llvm::GlobalValue *GV);
+  void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO);
+
+  /// Set attributes for a global definition.
+  void setFunctionDefinitionAttributes(const FunctionDecl *D,
+                                       llvm::Function *F);
 
-  /// SetFunctionAttributes - Set function attributes for a function
-  /// declaration.
+  /// Set function attributes for a function declaration.
   void SetFunctionAttributes(GlobalDecl GD,
                              llvm::Function *F,
                              bool IsIncompleteFunction);
 
-  void EmitGlobalDefinition(GlobalDecl D);
+  void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
 
-  void EmitGlobalFunctionDefinition(GlobalDecl GD);
+  void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
   void EmitGlobalVarDefinition(const VarDecl *D);
   void EmitAliasDefinition(GlobalDecl GD);
   void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
@@ -1055,44 +1077,44 @@ private:
   void EmitLinkageSpec(const LinkageSpecDecl *D);
   void CompleteDIClassType(const CXXMethodDecl* D);
 
-  /// EmitCXXConstructor - Emit a single constructor with the given type from
-  /// a C++ constructor Decl.
+  /// Emit a single constructor with the given type from a C++ constructor Decl.
   void EmitCXXConstructor(const CXXConstructorDecl *D, CXXCtorType Type);
 
-  /// EmitCXXDestructor - Emit a single destructor with the given type from
-  /// a C++ destructor Decl.
+  /// Emit a single destructor with the given type from a C++ destructor Decl.
   void EmitCXXDestructor(const CXXDestructorDecl *D, CXXDtorType Type);
 
   /// \brief Emit the function that initializes C++ thread_local variables.
   void EmitCXXThreadLocalInitFunc();
 
-  /// EmitCXXGlobalInitFunc - Emit the function that initializes C++ globals.
+  /// Emit the function that initializes C++ globals.
   void EmitCXXGlobalInitFunc();
 
-  /// EmitCXXGlobalDtorFunc - Emit the function that destroys C++ globals.
+  /// Emit the function that destroys C++ globals.
   void EmitCXXGlobalDtorFunc();
 
-  /// EmitCXXGlobalVarDeclInitFunc - Emit the function that initializes the
-  /// specified global (if PerformInit is true) and registers its destructor.
+  /// Emit the function that initializes the specified global (if PerformInit is
+  /// true) and registers its destructor.
   void EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
                                     llvm::GlobalVariable *Addr,
                                     bool PerformInit);
 
+  void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr,
+                             llvm::Function *InitFunc, InitSegAttr *ISA);
+
   // FIXME: Hardcoding priority here is gross.
-  void AddGlobalCtor(llvm::Function *Ctor, int Priority=65535);
-  void AddGlobalDtor(llvm::Function *Dtor, int Priority=65535);
+  void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
+                     llvm::Constant *AssociatedData = 0);
+  void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535);
 
-  /// EmitCtorList - Generates a global array of functions and priorities using
-  /// the given list and name. This array will have appending linkage and is
-  /// suitable for use as a LLVM constructor or destructor array.
+  /// Generates a global array of functions and priorities using the given list
+  /// and name. This array will have appending linkage and is suitable for use
+  /// as a LLVM constructor or destructor array.
   void EmitCtorList(const CtorList &Fns, const char *GlobalName);
 
-  /// EmitFundamentalRTTIDescriptor - Emit the RTTI descriptors for the
-  /// given type.
+  /// Emit the RTTI descriptors for the given type.
   void EmitFundamentalRTTIDescriptor(QualType Type);
 
-  /// EmitDeferred - Emit any needed decls for which code generation
-  /// was deferred.
+  /// Emit any needed decls for which code generation was deferred.
   void EmitDeferred();
 
   /// Call replaceAllUsesWith on all pairs in Replacements.
@@ -1100,13 +1122,11 @@ private:
 
   void checkAliases();
 
-  /// EmitDeferredVTables - Emit any vtables which we deferred and
-  /// still have a use for.
+  /// Emit any vtables which we deferred and still have a use for.
   void EmitDeferredVTables();
 
-  /// EmitLLVMUsed - Emit the llvm.used metadata used to force
-  /// references to global which may otherwise be optimized out.
-  void EmitLLVMUsed();
+  /// Emit the llvm.used and llvm.compiler.used metadata.
+  void emitLLVMUsed();
 
   /// \brief Emit the link options introduced by imported modules.
   void EmitModuleLinkOptions();
@@ -1120,20 +1140,22 @@ private:
   /// \brief Emit the Clang version as llvm.ident metadata.
   void EmitVersionIdentMetadata();
 
-  /// EmitCoverageFile - Emit the llvm.gcov metadata used to tell LLVM where
-  /// to emit the .gcno and .gcda files in a way that persists in .bc files.
+  /// Emits target specific Metadata for global declarations.
+  void EmitTargetMetadata();
+
+  /// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and
+  /// .gcda files in a way that persists in .bc files.
   void EmitCoverageFile();
 
   /// Emits the initializer for a uuidof string.
   llvm::Constant *EmitUuidofInitializer(StringRef uuidstr, QualType IIDType);
 
-  /// MayDeferGeneration - Determine if the given decl can be emitted
-  /// lazily; this is only relevant for definitions. The given decl
-  /// must be either a function or var decl.
+  /// Determine if the given decl can be emitted lazily; this is only relevant
+  /// for definitions. The given decl must be either a function or var decl.
   bool MayDeferGeneration(const ValueDecl *D);
 
-  /// SimplifyPersonality - Check whether we can use a "simpler", more
-  /// core exceptions personality function.
+  /// Check whether we can use a "simpler", more core exceptions personality
+  /// function.
   void SimplifyPersonality();
 };
 }  // end namespace CodeGen
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
new file mode 100644
index 000000000000..b233e3c7d75e
--- /dev/null
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -0,0 +1,1011 @@
+//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenPGO.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtVisitor.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MD5.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+void CodeGenPGO::setFuncName(llvm::Function *Fn) {
+  RawFuncName = Fn->getName();
+
+  // Function names may be prefixed with a binary '1' to indicate
+  // that the backend should not modify the symbols due to any platform
+  // naming convention. Do not include that '1' in the PGO profile name.
+  if (RawFuncName[0] == '\1')
+    RawFuncName = RawFuncName.substr(1);
+
+  if (!Fn->hasLocalLinkage()) {
+    PrefixedFuncName.reset(new std::string(RawFuncName));
+    return;
+  }
+
+  // For local symbols, prepend the main file name to distinguish them.
+  // Do not include the full path in the file name since there's no guarantee
+  // that it will stay the same, e.g., if the files are checked out from
+  // version control in different locations.
+  PrefixedFuncName.reset(new std::string(CGM.getCodeGenOpts().MainFileName));
+  if (PrefixedFuncName->empty())
+    PrefixedFuncName->assign("<unknown>");
+  PrefixedFuncName->append(":");
+  PrefixedFuncName->append(RawFuncName);
+}
+
+static llvm::Function *getRegisterFunc(CodeGenModule &CGM) {
+  return CGM.getModule().getFunction("__llvm_profile_register_functions");
+}
+
+static llvm::BasicBlock *getOrInsertRegisterBB(CodeGenModule &CGM) {
+  // Don't do this for Darwin.  compiler-rt uses linker magic.
+  if (CGM.getTarget().getTriple().isOSDarwin())
+    return nullptr;
+
+  // Only need to insert this once per module.
+  if (llvm::Function *RegisterF = getRegisterFunc(CGM))
+    return &RegisterF->getEntryBlock();
+
+  // Construct the function.
+  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
+  auto *RegisterFTy = llvm::FunctionType::get(VoidTy, false);
+  auto *RegisterF = llvm::Function::Create(RegisterFTy,
+                                           llvm::GlobalValue::InternalLinkage,
+                                           "__llvm_profile_register_functions",
+                                           &CGM.getModule());
+  RegisterF->setUnnamedAddr(true);
+  if (CGM.getCodeGenOpts().DisableRedZone)
+    RegisterF->addFnAttr(llvm::Attribute::NoRedZone);
+
+  // Construct and return the entry block.
+  auto *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", RegisterF);
+  CGBuilderTy Builder(BB);
+  Builder.CreateRetVoid();
+  return BB;
+}
+
+static llvm::Constant *getOrInsertRuntimeRegister(CodeGenModule &CGM) {
+  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
+  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
+  auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
+  return CGM.getModule().getOrInsertFunction("__llvm_profile_register_function",
+                                             RuntimeRegisterTy);
+}
+
+static bool isMachO(const CodeGenModule &CGM) {
+  return CGM.getTarget().getTriple().isOSBinFormatMachO();
+}
+
+static StringRef getCountersSection(const CodeGenModule &CGM) {
+  return isMachO(CGM) ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
+}
+
+static StringRef getNameSection(const CodeGenModule &CGM) {
+  return isMachO(CGM) ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
+}
+
+static StringRef getDataSection(const CodeGenModule &CGM) {
+  return isMachO(CGM) ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
+}
+
+llvm::GlobalVariable *CodeGenPGO::buildDataVar() {
+  // Create name variable.
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+  auto *VarName = llvm::ConstantDataArray::getString(Ctx, getFuncName(),
+                                                     false);
+  auto *Name = new llvm::GlobalVariable(CGM.getModule(), VarName->getType(),
+                                        true, VarLinkage, VarName,
+                                        getFuncVarName("name"));
+  Name->setSection(getNameSection(CGM));
+  Name->setAlignment(1);
+
+  // Create data variable.
+  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+  auto *Int64Ty = llvm::Type::getInt64Ty(Ctx);
+  auto *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
+  auto *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
+  llvm::Type *DataTypes[] = {
+    Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy
+  };
+  auto *DataTy = llvm::StructType::get(Ctx, makeArrayRef(DataTypes));
+  llvm::Constant *DataVals[] = {
+    llvm::ConstantInt::get(Int32Ty, getFuncName().size()),
+    llvm::ConstantInt::get(Int32Ty, NumRegionCounters),
+    llvm::ConstantInt::get(Int64Ty, FunctionHash),
+    llvm::ConstantExpr::getBitCast(Name, Int8PtrTy),
+    llvm::ConstantExpr::getBitCast(RegionCounters, Int64PtrTy)
+  };
+  auto *Data =
+    new llvm::GlobalVariable(CGM.getModule(), DataTy, true, VarLinkage,
+                             llvm::ConstantStruct::get(DataTy, DataVals),
+                             getFuncVarName("data"));
+
+  // All the data should be packed into an array in its own section.
+  Data->setSection(getDataSection(CGM));
+  Data->setAlignment(8);
+
+  // Hide all these symbols so that we correctly get a copy for each
+  // executable.  The profile format expects names and counters to be
+  // contiguous, so references into shared objects would be invalid.
+  if (!llvm::GlobalValue::isLocalLinkage(VarLinkage)) {
+    Name->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    Data->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    RegionCounters->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  }
+
+  // Make sure the data doesn't get deleted.
+  CGM.addUsedGlobal(Data);
+  return Data;
+}
+
+void CodeGenPGO::emitInstrumentationData() {
+  if (!RegionCounters)
+    return;
+
+  // Build the data.
+  auto *Data = buildDataVar();
+
+  // Register the data.
+  auto *RegisterBB = getOrInsertRegisterBB(CGM);
+  if (!RegisterBB)
+    return;
+  CGBuilderTy Builder(RegisterBB->getTerminator());
+  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
+  Builder.CreateCall(getOrInsertRuntimeRegister(CGM),
+                     Builder.CreateBitCast(Data, VoidPtrTy));
+}
+
+llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
+  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+    return nullptr;
+
+  assert(CGM.getModule().getFunction("__llvm_profile_init") == nullptr &&
+         "profile initialization already emitted");
+
+  // Get the function to call at initialization.
+  llvm::Constant *RegisterF = getRegisterFunc(CGM);
+  if (!RegisterF)
+    return nullptr;
+
+  // Create the initialization function.
+  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
+  auto *F = llvm::Function::Create(llvm::FunctionType::get(VoidTy, false),
+                                   llvm::GlobalValue::InternalLinkage,
+                                   "__llvm_profile_init", &CGM.getModule());
+  F->setUnnamedAddr(true);
+  F->addFnAttr(llvm::Attribute::NoInline);
+  if (CGM.getCodeGenOpts().DisableRedZone)
+    F->addFnAttr(llvm::Attribute::NoRedZone);
+
+  // Add the basic block and the necessary calls.
+  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F));
+  Builder.CreateCall(RegisterF);
+  Builder.CreateRetVoid();
+
+  return F;
+}
+
+namespace {
+/// \brief Stable hasher for PGO region counters.
+///
+/// PGOHash produces a stable hash of a given function's control flow.
+///
+/// Changing the output of this hash will invalidate all previously generated
+/// profiles -- i.e., don't do it.
+///
+/// \note  When this hash does eventually change (years?), we still need to
+/// support old hashes.  We'll need to pull in the version number from the
+/// profile data format and use the matching hash function.
+class PGOHash {
+  uint64_t Working;
+  unsigned Count;
+  llvm::MD5 MD5;
+
+  static const int NumBitsPerType = 6;
+  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
+  static const unsigned TooBig = 1u << NumBitsPerType;
+
+public:
+  /// \brief Hash values for AST nodes.
+  ///
+  /// Distinct values for AST nodes that have region counters attached.
+  ///
+  /// These values must be stable.  All new members must be added at the end,
+  /// and no members should be removed.  Changing the enumeration value for an
+  /// AST node will affect the hash of every function that contains that node.
+  enum HashType : unsigned char {
+    None = 0,
+    LabelStmt = 1,
+    WhileStmt,
+    DoStmt,
+    ForStmt,
+    CXXForRangeStmt,
+    ObjCForCollectionStmt,
+    SwitchStmt,
+    CaseStmt,
+    DefaultStmt,
+    IfStmt,
+    CXXTryStmt,
+    CXXCatchStmt,
+    ConditionalOperator,
+    BinaryOperatorLAnd,
+    BinaryOperatorLOr,
+    BinaryConditionalOperator,
+
+    // Keep this last.  It's for the static assert that follows.
+    LastHashType
+  };
+  static_assert(LastHashType <= TooBig, "Too many types in HashType");
+
+  // TODO: When this format changes, take in a version number here, and use the
+  // old hash calculation for file formats that used the old hash.
+  PGOHash() : Working(0), Count(0) {}
+  void combine(HashType Type);
+  uint64_t finalize();
+};
+const int PGOHash::NumBitsPerType;
+const unsigned PGOHash::NumTypesPerWord;
+const unsigned PGOHash::TooBig;
+
+  /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
+  struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
+    /// The next counter value to assign.
+    unsigned NextCounter;
+    /// The function hash.
+    PGOHash Hash;
+    /// The map of statements to counters.
+    llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
+
+    MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
+        : NextCounter(0), CounterMap(CounterMap) {}
+
+    // Blocks and lambdas are handled as separate functions, so we need not
+    // traverse them in the parent context.
+    bool TraverseBlockExpr(BlockExpr *BE) { return true; }
+    bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
+    bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
+
+    bool VisitDecl(const Decl *D) {
+      switch (D->getKind()) {
+      default:
+        break;
+      case Decl::Function:
+      case Decl::CXXMethod:
+      case Decl::CXXConstructor:
+      case Decl::CXXDestructor:
+      case Decl::CXXConversion:
+      case Decl::ObjCMethod:
+      case Decl::Block:
+      case Decl::Captured:
+        CounterMap[D->getBody()] = NextCounter++;
+        break;
+      }
+      return true;
+    }
+
+    bool VisitStmt(const Stmt *S) {
+      auto Type = getHashType(S);
+      if (Type == PGOHash::None)
+        return true;
+
+      CounterMap[S] = NextCounter++;
+      Hash.combine(Type);
+      return true;
+    }
+    PGOHash::HashType getHashType(const Stmt *S) {
+      switch (S->getStmtClass()) {
+      default:
+        break;
+      case Stmt::LabelStmtClass:
+        return PGOHash::LabelStmt;
+      case Stmt::WhileStmtClass:
+        return PGOHash::WhileStmt;
+      case Stmt::DoStmtClass:
+        return PGOHash::DoStmt;
+      case Stmt::ForStmtClass:
+        return PGOHash::ForStmt;
+      case Stmt::CXXForRangeStmtClass:
+        return PGOHash::CXXForRangeStmt;
+      case Stmt::ObjCForCollectionStmtClass:
+        return PGOHash::ObjCForCollectionStmt;
+      case Stmt::SwitchStmtClass:
+        return PGOHash::SwitchStmt;
+      case Stmt::CaseStmtClass:
+        return PGOHash::CaseStmt;
+      case Stmt::DefaultStmtClass:
+        return PGOHash::DefaultStmt;
+      case Stmt::IfStmtClass:
+        return PGOHash::IfStmt;
+      case Stmt::CXXTryStmtClass:
+        return PGOHash::CXXTryStmt;
+      case Stmt::CXXCatchStmtClass:
+        return PGOHash::CXXCatchStmt;
+      case Stmt::ConditionalOperatorClass:
+        return PGOHash::ConditionalOperator;
+      case Stmt::BinaryConditionalOperatorClass:
+        return PGOHash::BinaryConditionalOperator;
+      case Stmt::BinaryOperatorClass: {
+        const BinaryOperator *BO = cast<BinaryOperator>(S);
+        if (BO->getOpcode() == BO_LAnd)
+          return PGOHash::BinaryOperatorLAnd;
+        if (BO->getOpcode() == BO_LOr)
+          return PGOHash::BinaryOperatorLOr;
+        break;
+      }
+      }
+      return PGOHash::None;
+    }
+  };
+
+  /// A StmtVisitor that propagates the raw counts through the AST and
+  /// records the count at statements where the value may change.
+  struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
+    /// PGO state.
+    CodeGenPGO &PGO;
+
+    /// A flag that is set when the current count should be recorded on the
+    /// next statement, such as at the exit of a loop.
+    bool RecordNextStmtCount;
+
+    /// The map of statements to count values.
+    llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
+
+    /// BreakContinueStack - Keep counts of breaks and continues inside loops.
+    struct BreakContinue {
+      uint64_t BreakCount;
+      uint64_t ContinueCount;
+      BreakContinue() : BreakCount(0), ContinueCount(0) {}
+    };
+    SmallVector<BreakContinue, 8> BreakContinueStack;
+
+    ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
+                        CodeGenPGO &PGO)
+        : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
+
+    void RecordStmtCount(const Stmt *S) {
+      if (RecordNextStmtCount) {
+        CountMap[S] = PGO.getCurrentRegionCount();
+        RecordNextStmtCount = false;
+      }
+    }
+
+    void VisitStmt(const Stmt *S) {
+      RecordStmtCount(S);
+      for (Stmt::const_child_range I = S->children(); I; ++I) {
+        if (*I)
+         this->Visit(*I);
+      }
+    }
+
+    void VisitFunctionDecl(const FunctionDecl *D) {
+      // Counter tracks entry to the function body.
+      RegionCounter Cnt(PGO, D->getBody());
+      Cnt.beginRegion();
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
+    }
+
+    // Skip lambda expressions. We visit these as FunctionDecls when we're
+    // generating them and aren't interested in the body when generating a
+    // parent context.
+    void VisitLambdaExpr(const LambdaExpr *LE) {}
+
+    void VisitCapturedDecl(const CapturedDecl *D) {
+      // Counter tracks entry to the capture body.
+      RegionCounter Cnt(PGO, D->getBody());
+      Cnt.beginRegion();
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
+    }
+
+    void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
+      // Counter tracks entry to the method body.
+      RegionCounter Cnt(PGO, D->getBody());
+      Cnt.beginRegion();
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
+    }
+
+    void VisitBlockDecl(const BlockDecl *D) {
+      // Counter tracks entry to the block body.
+      RegionCounter Cnt(PGO, D->getBody());
+      Cnt.beginRegion();
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
+    }
+
+    void VisitReturnStmt(const ReturnStmt *S) {
+      RecordStmtCount(S);
+      if (S->getRetValue())
+        Visit(S->getRetValue());
+      PGO.setCurrentRegionUnreachable();
+      RecordNextStmtCount = true;
+    }
+
+    void VisitGotoStmt(const GotoStmt *S) {
+      RecordStmtCount(S);
+      PGO.setCurrentRegionUnreachable();
+      RecordNextStmtCount = true;
+    }
+
+    void VisitLabelStmt(const LabelStmt *S) {
+      RecordNextStmtCount = false;
+      // Counter tracks the block following the label.
+      RegionCounter Cnt(PGO, S);
+      Cnt.beginRegion();
+      CountMap[S] = PGO.getCurrentRegionCount();
+      Visit(S->getSubStmt());
+    }
+
+    void VisitBreakStmt(const BreakStmt *S) {
+      RecordStmtCount(S);
+      assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
+      BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
+      PGO.setCurrentRegionUnreachable();
+      RecordNextStmtCount = true;
+    }
+
+    void VisitContinueStmt(const ContinueStmt *S) {
+      RecordStmtCount(S);
+      assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
+      BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
+      PGO.setCurrentRegionUnreachable();
+      RecordNextStmtCount = true;
+    }
+
+    void VisitWhileStmt(const WhileStmt *S) {
+      RecordStmtCount(S);
+      // Counter tracks the body of the loop.
+      RegionCounter Cnt(PGO, S);
+      BreakContinueStack.push_back(BreakContinue());
+      // Visit the body region first so the break/continue adjustments can be
+      // included when visiting the condition.
+      Cnt.beginRegion();
+      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+      Visit(S->getBody());
+      Cnt.adjustForControlFlow();
+
+      // ...then go back and propagate counts through the condition. The count
+      // at the start of the condition is the sum of the incoming edges,
+      // the backedge from the end of the loop body, and the edges from
+      // continue statements.
+      BreakContinue BC = BreakContinueStack.pop_back_val();
+      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+                                Cnt.getAdjustedCount() + BC.ContinueCount);
+      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+      Visit(S->getCond());
+      Cnt.adjustForControlFlow();
+      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitDoStmt(const DoStmt *S) {
+      RecordStmtCount(S);
+      // Counter tracks the body of the loop.
+      RegionCounter Cnt(PGO, S);
+      BreakContinueStack.push_back(BreakContinue());
+      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+      Visit(S->getBody());
+      Cnt.adjustForControlFlow();
+
+      BreakContinue BC = BreakContinueStack.pop_back_val();
+      // The count at the start of the condition is equal to the count at the
+      // end of the body. The adjusted count does not include either the
+      // fall-through count coming into the loop or the continue count, so add
+      // both of those separately. This is coincidentally the same equation as
+      // with while loops but for different reasons.
+      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+                                Cnt.getAdjustedCount() + BC.ContinueCount);
+      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+      Visit(S->getCond());
+      Cnt.adjustForControlFlow();
+      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitForStmt(const ForStmt *S) {
+      RecordStmtCount(S);
+      if (S->getInit())
+        Visit(S->getInit());
+      // Counter tracks the body of the loop.
+      RegionCounter Cnt(PGO, S);
+      BreakContinueStack.push_back(BreakContinue());
+      // Visit the body region first. (This is basically the same as a while
+      // loop; see further comments in VisitWhileStmt.)
+      Cnt.beginRegion();
+      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+      Visit(S->getBody());
+      Cnt.adjustForControlFlow();
+
+      // The increment is essentially part of the body but it needs to include
+      // the count for all the continue statements.
+      if (S->getInc()) {
+        Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
+                                  BreakContinueStack.back().ContinueCount);
+        CountMap[S->getInc()] = PGO.getCurrentRegionCount();
+        Visit(S->getInc());
+        Cnt.adjustForControlFlow();
+      }
+
+      BreakContinue BC = BreakContinueStack.pop_back_val();
+
+      // ...then go back and propagate counts through the condition.
+      if (S->getCond()) {
+        Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+                                  Cnt.getAdjustedCount() +
+                                  BC.ContinueCount);
+        CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+        Visit(S->getCond());
+        Cnt.adjustForControlFlow();
+      }
+      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
+      RecordStmtCount(S);
+      Visit(S->getRangeStmt());
+      Visit(S->getBeginEndStmt());
+      // Counter tracks the body of the loop.
+      RegionCounter Cnt(PGO, S);
+      BreakContinueStack.push_back(BreakContinue());
+      // Visit the body region first. (This is basically the same as a while
+      // loop; see further comments in VisitWhileStmt.)
+      Cnt.beginRegion();
+      CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
+      Visit(S->getLoopVarStmt());
+      Visit(S->getBody());
+      Cnt.adjustForControlFlow();
+
+      // The increment is essentially part of the body but it needs to include
+      // the count for all the continue statements.
+      Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
+                                BreakContinueStack.back().ContinueCount);
+      CountMap[S->getInc()] = PGO.getCurrentRegionCount();
+      Visit(S->getInc());
+      Cnt.adjustForControlFlow();
+
+      BreakContinue BC = BreakContinueStack.pop_back_val();
+
+      // ...then go back and propagate counts through the condition.
+      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
+                                Cnt.getAdjustedCount() +
+                                BC.ContinueCount);
+      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+      Visit(S->getCond());
+      Cnt.adjustForControlFlow();
+      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
+      RecordStmtCount(S);
+      Visit(S->getElement());
+      // Counter tracks the body of the loop.
+      RegionCounter Cnt(PGO, S);
+      BreakContinueStack.push_back(BreakContinue());
+      Cnt.beginRegion();
+      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+      Visit(S->getBody());
+      BreakContinue BC = BreakContinueStack.pop_back_val();
+      Cnt.adjustForControlFlow();
+      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitSwitchStmt(const SwitchStmt *S) {
+      RecordStmtCount(S);
+      Visit(S->getCond());
+      PGO.setCurrentRegionUnreachable();
+      BreakContinueStack.push_back(BreakContinue());
+      Visit(S->getBody());
+      // If the switch is inside a loop, add the continue counts.
+      BreakContinue BC = BreakContinueStack.pop_back_val();
+      if (!BreakContinueStack.empty())
+        BreakContinueStack.back().ContinueCount += BC.ContinueCount;
+      // Counter tracks the exit block of the switch.
+      RegionCounter ExitCnt(PGO, S);
+      ExitCnt.beginRegion();
+      RecordNextStmtCount = true;
+    }
+
+    void VisitCaseStmt(const CaseStmt *S) {
+      RecordNextStmtCount = false;
+      // Counter for this particular case. This counts only jumps from the
+      // switch header and does not include fallthrough from the case before
+      // this one.
+      RegionCounter Cnt(PGO, S);
+      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+      CountMap[S] = Cnt.getCount();
+      RecordNextStmtCount = true;
+      Visit(S->getSubStmt());
+    }
+
+    void VisitDefaultStmt(const DefaultStmt *S) {
+      RecordNextStmtCount = false;
+      // Counter for this default case. This does not include fallthrough from
+      // the previous case.
+      RegionCounter Cnt(PGO, S);
+      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+      CountMap[S] = Cnt.getCount();
+      RecordNextStmtCount = true;
+      Visit(S->getSubStmt());
+    }
+
+    void VisitIfStmt(const IfStmt *S) {
+      RecordStmtCount(S);
+      // Counter tracks the "then" part of an if statement. The count for
+      // the "else" part, if it exists, will be calculated from this counter.
+      RegionCounter Cnt(PGO, S);
+      Visit(S->getCond());
+
+      Cnt.beginRegion();
+      CountMap[S->getThen()] = PGO.getCurrentRegionCount();
+      Visit(S->getThen());
+      Cnt.adjustForControlFlow();
+
+      if (S->getElse()) {
+        Cnt.beginElseRegion();
+        CountMap[S->getElse()] = PGO.getCurrentRegionCount();
+        Visit(S->getElse());
+        Cnt.adjustForControlFlow();
+      }
+      Cnt.applyAdjustmentsToRegion(0);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitCXXTryStmt(const CXXTryStmt *S) {
+      RecordStmtCount(S);
+      Visit(S->getTryBlock());
+      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
+        Visit(S->getHandler(I));
+      // Counter tracks the continuation block of the try statement.
+      RegionCounter Cnt(PGO, S);
+      Cnt.beginRegion();
+      RecordNextStmtCount = true;
+    }
+
+    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
+      RecordNextStmtCount = false;
+      // Counter tracks the catch statement's handler block.
+      RegionCounter Cnt(PGO, S);
+      Cnt.beginRegion();
+      CountMap[S] = PGO.getCurrentRegionCount();
+      Visit(S->getHandlerBlock());
+    }
+
+    void VisitAbstractConditionalOperator(
+        const AbstractConditionalOperator *E) {
+      RecordStmtCount(E);
+      // Counter tracks the "true" part of a conditional operator. The
+      // count in the "false" part will be calculated from this counter.
+      RegionCounter Cnt(PGO, E);
+      Visit(E->getCond());
+
+      Cnt.beginRegion();
+      CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
+      Visit(E->getTrueExpr());
+      Cnt.adjustForControlFlow();
+
+      Cnt.beginElseRegion();
+      CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
+      Visit(E->getFalseExpr());
+      Cnt.adjustForControlFlow();
+
+      Cnt.applyAdjustmentsToRegion(0);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitBinLAnd(const BinaryOperator *E) {
+      RecordStmtCount(E);
+      // Counter tracks the right hand side of a logical and operator.
+      RegionCounter Cnt(PGO, E);
+      Visit(E->getLHS());
+      Cnt.beginRegion();
+      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+      Visit(E->getRHS());
+      Cnt.adjustForControlFlow();
+      Cnt.applyAdjustmentsToRegion(0);
+      RecordNextStmtCount = true;
+    }
+
+    void VisitBinLOr(const BinaryOperator *E) {
+      RecordStmtCount(E);
+      // Counter tracks the right hand side of a logical or operator.
+      RegionCounter Cnt(PGO, E);
+      Visit(E->getLHS());
+      Cnt.beginRegion();
+      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+      Visit(E->getRHS());
+      Cnt.adjustForControlFlow();
+      Cnt.applyAdjustmentsToRegion(0);
+      RecordNextStmtCount = true;
+    }
+  };
+}
+
+void PGOHash::combine(HashType Type) {
+  // Check that we never combine 0 and only have six bits.
+  assert(Type && "Hash is invalid: unexpected type 0");
+  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
+
+  // Pass through MD5 if enough work has built up.
+  if (Count && Count % NumTypesPerWord == 0) {
+    using namespace llvm::support;
+    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
+    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
+    Working = 0;
+  }
+
+  // Accumulate the current type.
+  ++Count;
+  Working = Working << NumBitsPerType | Type;
+}
+
+uint64_t PGOHash::finalize() {
+  // Use Working as the hash directly if we never used MD5.
+  if (Count <= NumTypesPerWord)
+    // No need to byte swap here, since none of the math was endian-dependent.
+    // This number will be byte-swapped as required on endianness transitions,
+    // so we will see the same value on the other side.
+    return Working;
+
+  // Check for remaining work in Working.
+  if (Working)
+    MD5.update(Working);
+
+  // Finalize the MD5 and return the hash.
+  llvm::MD5::MD5Result Result;
+  MD5.final(Result);
+  using namespace llvm::support;
+  return endian::read<uint64_t, little, unaligned>(Result);
+}
+
+static void emitRuntimeHook(CodeGenModule &CGM) {
+  const char *const RuntimeVarName = "__llvm_profile_runtime";
+  const char *const RuntimeUserName = "__llvm_profile_runtime_user";
+  if (CGM.getModule().getGlobalVariable(RuntimeVarName))
+    return;
+
+  // Declare the runtime hook.
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+  auto *Var = new llvm::GlobalVariable(CGM.getModule(), Int32Ty, false,
+                                       llvm::GlobalValue::ExternalLinkage,
+                                       nullptr, RuntimeVarName);
+
+  // Make a function that uses it.
+  auto *User = llvm::Function::Create(llvm::FunctionType::get(Int32Ty, false),
+                                      llvm::GlobalValue::LinkOnceODRLinkage,
+                                      RuntimeUserName, &CGM.getModule());
+  User->addFnAttr(llvm::Attribute::NoInline);
+  if (CGM.getCodeGenOpts().DisableRedZone)
+    User->addFnAttr(llvm::Attribute::NoRedZone);
+  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", User));
+  auto *Load = Builder.CreateLoad(Var);
+  Builder.CreateRet(Load);
+
+  // Create a use of the function.  Now the definition of the runtime variable
+  // should get pulled in, along with any static initializears.
+  CGM.addUsedGlobal(User);
+}
+
+void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) {
+  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
+  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
+  if (!InstrumentRegions && !PGOReader)
+    return;
+  if (D->isImplicit())
+    return;
+  setFuncName(Fn);
+
+  // Set the linkage for variables based on the function linkage.  Usually, we
+  // want to match it, but available_externally and extern_weak both have the
+  // wrong semantics.
+  VarLinkage = Fn->getLinkage();
+  switch (VarLinkage) {
+  case llvm::GlobalValue::ExternalWeakLinkage:
+    VarLinkage = llvm::GlobalValue::LinkOnceAnyLinkage;
+    break;
+  case llvm::GlobalValue::AvailableExternallyLinkage:
+    VarLinkage = llvm::GlobalValue::LinkOnceODRLinkage;
+    break;
+  default:
+    break;
+  }
+
+  mapRegionCounters(D);
+  if (InstrumentRegions) {
+    emitRuntimeHook(CGM);
+    emitCounterVariables();
+  }
+  if (PGOReader) {
+    SourceManager &SM = CGM.getContext().getSourceManager();
+    loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
+    computeRegionCounts(D);
+    applyFunctionAttributes(PGOReader, Fn);
+  }
+}
+
+void CodeGenPGO::mapRegionCounters(const Decl *D) {
+  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
+  MapRegionCounters Walker(*RegionCounterMap);
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
+  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
+    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
+  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
+    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
+  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
+    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
+  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
+  NumRegionCounters = Walker.NextCounter;
+  FunctionHash = Walker.Hash.finalize();
+}
+
+void CodeGenPGO::computeRegionCounts(const Decl *D) {
+  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
+  ComputeRegionCounts Walker(*StmtCountMap, *this);
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+    Walker.VisitFunctionDecl(FD);
+  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
+    Walker.VisitObjCMethodDecl(MD);
+  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
+    Walker.VisitBlockDecl(BD);
+  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
+    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
+}
+
+void
+CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
+                                    llvm::Function *Fn) {
+  if (!haveRegionCounts())
+    return;
+
+  uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount();
+  uint64_t FunctionCount = getRegionCount(0);
+  if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
+    // Turn on InlineHint attribute for hot functions.
+    // FIXME: 30% is from preliminary tuning on SPEC, it may not be optimal.
+    Fn->addFnAttr(llvm::Attribute::InlineHint);
+  else if (FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount))
+    // Turn on Cold attribute for cold functions.
+    // FIXME: 1% is from preliminary tuning on SPEC, it may not be optimal.
+    Fn->addFnAttr(llvm::Attribute::Cold);
+}
+
+void CodeGenPGO::emitCounterVariables() {
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+  llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
+                                                    NumRegionCounters);
+  RegionCounters =
+    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false, VarLinkage,
+                             llvm::Constant::getNullValue(CounterTy),
+                             getFuncVarName("counters"));
+  RegionCounters->setAlignment(8);
+  RegionCounters->setSection(getCountersSection(CGM));
+}
+
+void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
+  if (!RegionCounters)
+    return;
+  llvm::Value *Addr =
+    Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
+  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
+  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+  Builder.CreateStore(Count, Addr);
+}
+
+void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
+                                  bool IsInMainFile) {
+  CGM.getPGOStats().addVisited(IsInMainFile);
+  RegionCounts.reset(new std::vector<uint64_t>);
+  uint64_t Hash;
+  if (PGOReader->getFunctionCounts(getFuncName(), Hash, *RegionCounts)) {
+    CGM.getPGOStats().addMissing(IsInMainFile);
+    RegionCounts.reset();
+  } else if (Hash != FunctionHash ||
+             RegionCounts->size() != NumRegionCounters) {
+    CGM.getPGOStats().addMismatched(IsInMainFile);
+    RegionCounts.reset();
+  }
+}
+
+void CodeGenPGO::destroyRegionCounters() {
+  RegionCounterMap.reset();
+  StmtCountMap.reset();
+  RegionCounts.reset();
+  RegionCounters = nullptr;
+}
+
+/// \brief Calculate what to divide by to scale weights.
+///
+/// Given the maximum weight, calculate a divisor that will scale all the
+/// weights to strictly less than UINT32_MAX.
+static uint64_t calculateWeightScale(uint64_t MaxWeight) {
+  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
+}
+
+/// \brief Scale an individual branch weight (and add 1).
+///
+/// Scale a 64-bit weight down to 32-bits using \c Scale.
+///
+/// According to Laplace's Rule of Succession, it is better to compute the
+/// weight based on the count plus 1, so universally add 1 to the value.
+///
+/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
+/// greater than \c Weight.
+static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
+  assert(Scale && "scale by 0?");
+  uint64_t Scaled = Weight / Scale + 1;
+  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
+  return Scaled;
+}
+
+llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
+                                              uint64_t FalseCount) {
+  // Check for empty weights.
+  if (!TrueCount && !FalseCount)
+    return nullptr;
+
+  // Calculate how to scale down to 32-bits.
+  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
+
+  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
+                                      scaleBranchWeight(FalseCount, Scale));
+}
+
+llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
+  // We need at least two elements to create meaningful weights.
+  if (Weights.size() < 2)
+    return nullptr;
+
+  // Check for empty weights.
+  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
+  if (MaxWeight == 0)
+    return nullptr;
+
+  // Calculate how to scale down to 32-bits.
+  uint64_t Scale = calculateWeightScale(MaxWeight);
+
+  SmallVector<uint32_t, 16> ScaledWeights;
+  ScaledWeights.reserve(Weights.size());
+  for (uint64_t W : Weights)
+    ScaledWeights.push_back(scaleBranchWeight(W, Scale));
+
+  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+  return MDHelper.createBranchWeights(ScaledWeights);
+}
+
+llvm::MDNode *CodeGenPGO::createLoopWeights(const Stmt *Cond,
+                                            RegionCounter &Cnt) {
+  if (!haveRegionCounts())
+    return nullptr;
+  uint64_t LoopCount = Cnt.getCount();
+  uint64_t CondCount = 0;
+  bool Found = getStmtCount(Cond, CondCount);
+  assert(Found && "missing expected loop condition count");
+  (void)Found;
+  if (CondCount == 0)
+    return nullptr;
+  return createBranchWeights(LoopCount,
+                             std::max(CondCount, LoopCount) - LoopCount);
+}
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
new file mode 100644
index 000000000000..2f4aa660bea3
--- /dev/null
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -0,0 +1,236 @@
+//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Instrumentation-based profile-guided optimization
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_CODEGENPGO_H
+#define CLANG_CODEGEN_CODEGENPGO_H
+
+#include "CGBuilder.h"
+#include "CodeGenModule.h"
+#include "CodeGenTypes.h"
+#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace clang {
+namespace CodeGen {
+class RegionCounter;
+
+/// Per-function PGO state. This class should generally not be used directly,
+/// but instead through the CodeGenFunction and RegionCounter types.
+class CodeGenPGO {
+private:
+  CodeGenModule &CGM;
+  std::unique_ptr<std::string> PrefixedFuncName;
+  StringRef RawFuncName;
+  llvm::GlobalValue::LinkageTypes VarLinkage;
+
+  unsigned NumRegionCounters;
+  uint64_t FunctionHash;
+  llvm::GlobalVariable *RegionCounters;
+  std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
+  std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
+  std::unique_ptr<std::vector<uint64_t>> RegionCounts;
+  uint64_t CurrentRegionCount;
+
+public:
+  CodeGenPGO(CodeGenModule &CGM)
+      : CGM(CGM), NumRegionCounters(0), FunctionHash(0),
+        RegionCounters(nullptr), CurrentRegionCount(0) {}
+
+  /// Whether or not we have PGO region data for the current function. This is
+  /// false both when we have no data at all and when our data has been
+  /// discarded.
+  bool haveRegionCounts() const { return RegionCounts != nullptr; }
+
+  /// Get the string used to identify this function in the profile data.
+  /// For functions with local linkage, this includes the main file name.
+  StringRef getFuncName() const { return StringRef(*PrefixedFuncName); }
+  std::string getFuncVarName(StringRef VarName) const {
+    return ("__llvm_profile_" + VarName + "_" + RawFuncName).str();
+  }
+
+  /// Return the counter value of the current region.
+  uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
+
+  /// Set the counter value for the current region. This is used to keep track
+  /// of changes to the most recent counter from control flow and non-local
+  /// exits.
+  void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
+
+  /// Indicate that the current region is never reached, and thus should have a
+  /// counter value of zero. This is important so that subsequent regions can
+  /// correctly track their parent counts.
+  void setCurrentRegionUnreachable() { setCurrentRegionCount(0); }
+
+  /// Check if an execution count is known for a given statement. If so, return
+  /// true and put the value in Count; else return false.
+  bool getStmtCount(const Stmt *S, uint64_t &Count) {
+    if (!StmtCountMap)
+      return false;
+    llvm::DenseMap<const Stmt*, uint64_t>::const_iterator
+      I = StmtCountMap->find(S);
+    if (I == StmtCountMap->end())
+      return false;
+    Count = I->second;
+    return true;
+  }
+
+  /// If the execution count for the current statement is known, record that
+  /// as the current count.
+  void setCurrentStmt(const Stmt *S) {
+    uint64_t Count;
+    if (getStmtCount(S, Count))
+      setCurrentRegionCount(Count);
+  }
+
+  /// Calculate branch weights appropriate for PGO data
+  llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
+  llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
+  llvm::MDNode *createLoopWeights(const Stmt *Cond, RegionCounter &Cnt);
+
+  /// Assign counters to regions and configure them for PGO of a given
+  /// function. Does nothing if instrumentation is not enabled and either
+  /// generates global variables or associates PGO data with each of the
+  /// counters depending on whether we are generating or using instrumentation.
+  void assignRegionCounters(const Decl *D, llvm::Function *Fn);
+  /// Emit static data structures for instrumentation data.
+  void emitInstrumentationData();
+  /// Clean up region counter state. Must be called if assignRegionCounters is
+  /// used.
+  void destroyRegionCounters();
+  /// Emit static initialization code, if any.
+  static llvm::Function *emitInitialization(CodeGenModule &CGM);
+
+private:
+  void setFuncName(llvm::Function *Fn);
+  void mapRegionCounters(const Decl *D);
+  void computeRegionCounts(const Decl *D);
+  void applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
+                               llvm::Function *Fn);
+  void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
+                        bool IsInMainFile);
+  void emitCounterVariables();
+  llvm::GlobalVariable *buildDataVar();
+
+  /// Emit code to increment the counter at the given index
+  void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
+
+  /// Return the region counter for the given statement. This should only be
+  /// called on statements that have a dedicated counter.
+  unsigned getRegionCounter(const Stmt *S) {
+    if (!RegionCounterMap)
+      return 0;
+    return (*RegionCounterMap)[S];
+  }
+
+  /// Return the region count for the counter at the given index.
+  uint64_t getRegionCount(unsigned Counter) {
+    if (!haveRegionCounts())
+      return 0;
+    return (*RegionCounts)[Counter];
+  }
+
+  friend class RegionCounter;
+};
+
+/// A counter for a particular region. This is the primary interface through
+/// which clients manage PGO counters and their values.
+class RegionCounter {
+  CodeGenPGO *PGO;
+  unsigned Counter;
+  uint64_t Count;
+  uint64_t ParentCount;
+  uint64_t RegionCount;
+  int64_t Adjust;
+
+  RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
+    : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
+      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+public:
+  RegionCounter(CodeGenPGO &PGO, const Stmt *S)
+    : PGO(&PGO), Counter(PGO.getRegionCounter(S)),
+      Count(PGO.getRegionCount(Counter)),
+      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
+
+  /// Get the value of the counter. In most cases this is the number of times
+  /// the region of the counter was entered, but for switch labels it's the
+  /// number of direct jumps to that label.
+  uint64_t getCount() const { return Count; }
+
+  /// Get the value of the counter with adjustments applied. Adjustments occur
+  /// when control enters or leaves the region abnormally; i.e., if there is a
+  /// jump to a label within the region, or if the function can return from
+  /// within the region. The adjusted count, then, is the value of the counter
+  /// at the end of the region.
+  uint64_t getAdjustedCount() const {
+    return Count + Adjust;
+  }
+
+  /// Get the value of the counter in this region's parent, i.e., the region
+  /// that was active when this region began. This is useful for deriving
+  /// counts in implicitly counted regions, like the false case of a condition
+  /// or the normal exits of a loop.
+  uint64_t getParentCount() const { return ParentCount; }
+
+  /// Activate the counter by emitting an increment and starting to track
+  /// adjustments. If AddIncomingFallThrough is true, the current region count
+  /// will be added to the counter for the purposes of tracking the region.
+  void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
+    beginRegion(AddIncomingFallThrough);
+    PGO->emitCounterIncrement(Builder, Counter);
+  }
+  void beginRegion(bool AddIncomingFallThrough=false) {
+    RegionCount = Count;
+    if (AddIncomingFallThrough)
+      RegionCount += PGO->getCurrentRegionCount();
+    PGO->setCurrentRegionCount(RegionCount);
+  }
+
+  /// For counters on boolean branches, begins tracking adjustments for the
+  /// uncounted path.
+  void beginElseRegion() {
+    RegionCount = ParentCount - Count;
+    PGO->setCurrentRegionCount(RegionCount);
+  }
+
+  /// Reset the current region count.
+  void setCurrentRegionCount(uint64_t CurrentCount) {
+    RegionCount = CurrentCount;
+    PGO->setCurrentRegionCount(RegionCount);
+  }
+
+  /// Adjust for non-local control flow after emitting a subexpression or
+  /// substatement. This must be called to account for constructs such as gotos,
+  /// labels, and returns, so that we can ensure that our region's count is
+  /// correct in the code that follows.
+  void adjustForControlFlow() {
+    Adjust += PGO->getCurrentRegionCount() - RegionCount;
+    // Reset the region count in case this is called again later.
+    RegionCount = PGO->getCurrentRegionCount();
+  }
+
+  /// Commit all adjustments to the current region. If the region is a loop,
+  /// the LoopAdjust value should be the count of all the breaks and continues
+  /// from the loop, to compensate for those counts being deducted from the
+  /// adjustments for the body of the loop.
+  void applyAdjustmentsToRegion(uint64_t LoopAdjust) {
+    PGO->setCurrentRegionCount(ParentCount + Adjust + LoopAdjust);
+  }
+};
+
+}  // end namespace CodeGen
+}  // end namespace clang
+
+#endif
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index 699cc2eabe18..53ba02a81397 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -33,7 +33,7 @@ CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext& VMContext,
                          const CodeGenOptions &CGO,
                          const LangOptions &Features, MangleContext &MContext)
   : Context(Ctx), CodeGenOpts(CGO), Features(Features), MContext(MContext),
-    MDHelper(VMContext), Root(0), Char(0) {
+    MDHelper(VMContext), Root(nullptr), Char(nullptr) {
 }
 
 CodeGenTBAA::~CodeGenTBAA() {
@@ -88,7 +88,7 @@ llvm::MDNode *
 CodeGenTBAA::getTBAAInfo(QualType QTy) {
   // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
   if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
-    return NULL;
+    return nullptr;
 
   // If the type has the may_alias attribute (even on a typedef), it is
   // effectively in the general char alias class.
@@ -221,7 +221,7 @@ CodeGenTBAA::getTBAAStructInfo(QualType QTy) {
     return MDHelper.createTBAAStructNode(Fields);
 
   // For now, handle any other kind of type conservatively.
-  return StructMetadataCache[Ty] = NULL;
+  return StructMetadataCache[Ty] = nullptr;
 }
 
 /// Check if the given type can be handled by path-aware TBAA.
@@ -261,7 +261,7 @@ CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) {
       else
         FieldNode = getTBAAInfo(FieldQTy);
       if (!FieldNode)
-        return StructTypeMetadataCache[Ty] = NULL;
+        return StructTypeMetadataCache[Ty] = nullptr;
       Fields.push_back(std::make_pair(
           FieldNode, Layout.getFieldOffset(idx) / Context.getCharWidth()));
     }
@@ -280,7 +280,7 @@ CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) {
       MDHelper.createTBAAStructTypeNode(OutName, Fields);
   }
 
-  return StructMetadataCache[Ty] = NULL;
+  return StructMetadataCache[Ty] = nullptr;
 }
 
 /// Return a TBAA tag node for both scalar TBAA and struct-path aware TBAA.
@@ -288,7 +288,7 @@ llvm::MDNode *
 CodeGenTBAA::getTBAAStructTagInfo(QualType BaseQTy, llvm::MDNode *AccessNode,
                                   uint64_t Offset) {
   if (!AccessNode)
-    return NULL;
+    return nullptr;
 
   if (!CodeGenOpts.StructPathTBAA)
     return getTBAAScalarTagInfo(AccessNode);
@@ -298,7 +298,7 @@ CodeGenTBAA::getTBAAStructTagInfo(QualType BaseQTy, llvm::MDNode *AccessNode,
   if (llvm::MDNode *N = StructTagMetadataCache[PathTag])
     return N;
 
-  llvm::MDNode *BNode = 0;
+  llvm::MDNode *BNode = nullptr;
   if (isTBAAPathStruct(BaseQTy))
     BNode  = getTBAAStructTypeInfo(BaseQTy);
   if (!BNode)
@@ -312,7 +312,7 @@ CodeGenTBAA::getTBAAStructTagInfo(QualType BaseQTy, llvm::MDNode *AccessNode,
 llvm::MDNode *
 CodeGenTBAA::getTBAAScalarTagInfo(llvm::MDNode *AccessNode) {
   if (!AccessNode)
-    return NULL;
+    return nullptr;
   if (llvm::MDNode *N = ScalarTagMetadataCache[AccessNode])
     return N;
 
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 5f3c59c197a6..d4e22623e029 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -38,10 +38,7 @@ CodeGenTypes::CodeGenTypes(CodeGenModule &cgm)
 }
 
 CodeGenTypes::~CodeGenTypes() {
-  for (llvm::DenseMap<const Type *, CGRecordLayout *>::iterator
-         I = CGRecordLayouts.begin(), E = CGRecordLayouts.end();
-      I != E; ++I)
-    delete I->second;
+  llvm::DeleteContainerSeconds(CGRecordLayouts);
 
   for (llvm::FoldingSet<CGFunctionInfo>::iterator
        I = FunctionInfos.begin(), E = FunctionInfos.end(); I != E; )
@@ -134,17 +131,15 @@ isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT,
   // when a class is translated, even though they aren't embedded by-value into
   // the class.
   if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
-    for (CXXRecordDecl::base_class_const_iterator I = CRD->bases_begin(),
-         E = CRD->bases_end(); I != E; ++I)
-      if (!isSafeToConvert(I->getType()->getAs<RecordType>()->getDecl(),
+    for (const auto &I : CRD->bases())
+      if (!isSafeToConvert(I.getType()->getAs<RecordType>()->getDecl(),
                            CGT, AlreadyChecked))
         return false;
   }
   
   // If this type would require laying out members that are currently being laid
   // out, don't do it.
-  for (RecordDecl::field_iterator I = RD->field_begin(),
-       E = RD->field_end(); I != E; ++I)
+  for (const auto *I : RD->fields())
     if (!isSafeToConvert(I->getType(), CGT, AlreadyChecked))
       return false;
   
@@ -186,24 +181,23 @@ static bool isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT) {
   return isSafeToConvert(RD, CGT, AlreadyChecked);
 }
 
-
-/// isFuncTypeArgumentConvertible - Return true if the specified type in a 
-/// function argument or result position can be converted to an IR type at this
+/// isFuncParamTypeConvertible - Return true if the specified type in a
+/// function parameter or result position can be converted to an IR type at this
 /// point.  This boils down to being whether it is complete, as well as whether
 /// we've temporarily deferred expanding the type because we're in a recursive
 /// context.
-bool CodeGenTypes::isFuncTypeArgumentConvertible(QualType Ty) {
+bool CodeGenTypes::isFuncParamTypeConvertible(QualType Ty) {
   // If this isn't a tagged type, we can convert it!
   const TagType *TT = Ty->getAs<TagType>();
-  if (TT == 0) return true;
-    
+  if (!TT) return true;
+
   // Incomplete types cannot be converted.
   if (TT->isIncompleteType())
     return false;
   
   // If this is an enum, then it is always safe to convert.
   const RecordType *RT = dyn_cast<RecordType>(TT);
-  if (RT == 0) return true;
+  if (!RT) return true;
 
   // Otherwise, we have to be careful.  If it is a struct that we're in the
   // process of expanding, then we can't convert the function type.  That's ok
@@ -217,17 +211,17 @@ bool CodeGenTypes::isFuncTypeArgumentConvertible(QualType Ty) {
 
 
 /// Code to verify a given function type is complete, i.e. the return type
-/// and all of the argument types are complete.  Also check to see if we are in
+/// and all of the parameter types are complete.  Also check to see if we are in
 /// a RS_StructPointer context, and if so whether any struct types have been
 /// pended.  If so, we don't want to ask the ABI lowering code to handle a type
 /// that cannot be converted to an IR type.
 bool CodeGenTypes::isFuncTypeConvertible(const FunctionType *FT) {
-  if (!isFuncTypeArgumentConvertible(FT->getResultType()))
+  if (!isFuncParamTypeConvertible(FT->getReturnType()))
     return false;
   
   if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT))
-    for (unsigned i = 0, e = FPT->getNumArgs(); i != e; i++)
-      if (!isFuncTypeArgumentConvertible(FPT->getArgType(i)))
+    for (unsigned i = 0, e = FPT->getNumParams(); i != e; i++)
+      if (!isFuncParamTypeConvertible(FPT->getParamType(i)))
         return false;
 
   return true;
@@ -248,6 +242,10 @@ void CodeGenTypes::UpdateCompletedType(const TagDecl *TD) {
       if (!ConvertType(ED->getIntegerType())->isIntegerTy(32))
         TypeCache.clear();
     }
+    // If necessary, provide the full definition of a type only used with a
+    // declaration so far.
+    if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
+      DI->completeType(ED);
     return;
   }
   
@@ -306,7 +304,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     return TCI->second;
 
   // If we don't have it in the cache, convert it now.
-  llvm::Type *ResultType = 0;
+  llvm::Type *ResultType = nullptr;
   switch (Ty->getTypeClass()) {
   case Type::Record: // Handled above.
 #define TYPE(Class, Base)
@@ -479,11 +477,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
 
       // Force conversion of all the relevant record types, to make sure
       // we re-convert the FunctionType when appropriate.
-      if (const RecordType *RT = FT->getResultType()->getAs<RecordType>())
+      if (const RecordType *RT = FT->getReturnType()->getAs<RecordType>())
         ConvertRecordDeclType(RT->getDecl());
       if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT))
-        for (unsigned i = 0, e = FPT->getNumArgs(); i != e; i++)
-          if (const RecordType *RT = FPT->getArgType(i)->getAs<RecordType>())
+        for (unsigned i = 0, e = FPT->getNumParams(); i != e; i++)
+          if (const RecordType *RT = FPT->getParamType(i)->getAs<RecordType>())
             ConvertRecordDeclType(RT->getDecl());
 
       // Return a placeholder type.
@@ -493,7 +491,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
       break;
     }
 
-    // While we're converting the argument types for a function, we don't want
+    // While we're converting the parameter types for a function, we don't want
     // to recursively convert any pointed-to structs.  Converting directly-used
     // structs is ok though.
     if (!RecordsBeingLaidOut.insert(Ty)) {
@@ -631,7 +629,7 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
   llvm::StructType *&Entry = RecordDeclTypes[Key];
 
   // If we don't have a StructType at all yet, create the forward declaration.
-  if (Entry == 0) {
+  if (!Entry) {
     Entry = llvm::StructType::create(getLLVMContext());
     addRecordTypeName(RD, Entry, "");
   }
@@ -640,7 +638,7 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
   // If this is still a forward declaration, or the LLVM type is already
   // complete, there's nothing more to do.
   RD = RD->getDefinition();
-  if (RD == 0 || !RD->isCompleteDefinition() || !Ty->isOpaque())
+  if (!RD || !RD->isCompleteDefinition() || !Ty->isOpaque())
     return Ty;
   
   // If converting this type would cause us to infinitely loop, don't do it!
@@ -655,11 +653,10 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
   
   // Force conversion of non-virtual base classes recursively.
   if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
-    for (CXXRecordDecl::base_class_const_iterator i = CRD->bases_begin(),
-         e = CRD->bases_end(); i != e; ++i) {
-      if (i->isVirtual()) continue;
+    for (const auto &I : CRD->bases()) {
+      if (I.isVirtual()) continue;
       
-      ConvertRecordDeclType(i->getType()->getAs<RecordType>()->getDecl());
+      ConvertRecordDeclType(I.getType()->getAs<RecordType>()->getDecl());
     }
   }
 
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 94ca9e21e5fe..fe155b53b3c4 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -104,7 +104,7 @@ class CodeGenTypes {
   
 private:
   /// TypeCache - This map keeps cache of llvm::Types
-  /// and maps llvm::Types to corresponding clang::Type.
+  /// and maps clang::Type to corresponding llvm::Type.
   llvm::DenseMap<const Type *, llvm::Type *> TypeCache;
 
 public:
@@ -136,8 +136,8 @@ public:
   /// be converted to an LLVM type (i.e. doesn't depend on an incomplete tag
   /// type).
   bool isFuncTypeConvertible(const FunctionType *FT);
-  bool isFuncTypeArgumentConvertible(QualType Ty);
-  
+  bool isFuncParamTypeConvertible(QualType Ty);
+
   /// GetFunctionTypeForVTable - Get the LLVM function type for use in a vtable,
   /// given a CXXMethodDecl. If the method to has an incomplete return type,
   /// and/or incomplete argument types, this will return the opaque type.
@@ -175,10 +175,10 @@ public:
 
   const CGFunctionInfo &arrangeGlobalDeclaration(GlobalDecl GD);
   const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
-  const CGFunctionInfo &arrangeFunctionDeclaration(QualType ResTy,
-                                                   const FunctionArgList &Args,
-                                             const FunctionType::ExtInfo &Info,
-                                                   bool isVariadic);
+  const CGFunctionInfo &
+  arrangeFreeFunctionDeclaration(QualType ResTy, const FunctionArgList &Args,
+                                 const FunctionType::ExtInfo &Info,
+                                 bool isVariadic);
 
   const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
   const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
@@ -188,6 +188,10 @@ public:
   const CGFunctionInfo &arrangeCXXConstructorDeclaration(
                                                     const CXXConstructorDecl *D,
                                                     CXXCtorType Type);
+  const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args,
+                                                  const CXXConstructorDecl *D,
+                                                  CXXCtorType CtorKind,
+                                                  unsigned ExtraArgs);
   const CGFunctionInfo &arrangeCXXDestructor(const CXXDestructorDecl *D,
                                              CXXDtorType Type);
 
@@ -216,6 +220,7 @@ public:
   ///
   /// \param argTypes - must all actually be canonical as params
   const CGFunctionInfo &arrangeLLVMFunctionInfo(CanQualType returnType,
+                                                bool IsInstanceMethod,
                                                 ArrayRef<CanQualType> argTypes,
                                                 FunctionType::ExtInfo info,
                                                 RequiredArgs args);
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index e9d9a339ffc7..b9ccfb63d7d5 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -19,8 +19,8 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Value.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
 
 namespace clang {
 namespace CodeGen {
@@ -65,9 +65,9 @@ template <class T> struct InvariantValue {
 template <class T> struct DominatingValue : InvariantValue<T> {};
 
 template <class T, bool mightBeInstruction =
-            llvm::is_base_of<llvm::Value, T>::value &&
-            !llvm::is_base_of<llvm::Constant, T>::value &&
-            !llvm::is_base_of<llvm::BasicBlock, T>::value>
+            std::is_base_of<llvm::Value, T>::value &&
+            !std::is_base_of<llvm::Constant, T>::value &&
+            !std::is_base_of<llvm::BasicBlock, T>::value>
 struct DominatingPointer;
 template <class T> struct DominatingPointer<T,false> : InvariantValue<T*> {};
 // template <class T> struct DominatingPointer<T,true> at end of file
@@ -182,7 +182,7 @@ public:
     typedef typename DominatingValue<A0>::saved_type A0_saved;
     A0_saved a0_saved;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
       T(a0).Emit(CGF, flags);
     }
@@ -199,7 +199,7 @@ public:
     A0_saved a0_saved;
     A1_saved a1_saved;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
       A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
       T(a0, a1).Emit(CGF, flags);
@@ -219,7 +219,7 @@ public:
     A1_saved a1_saved;
     A2_saved a2_saved;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
       A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
       A2 a2 = DominatingValue<A2>::restore(CGF, a2_saved);
@@ -242,7 +242,7 @@ public:
     A2_saved a2_saved;
     A3_saved a3_saved;
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
       A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
       A2 a2 = DominatingValue<A2>::restore(CGF, a2_saved);
@@ -301,8 +301,8 @@ private:
   void *pushCleanup(CleanupKind K, size_t DataSize);
 
 public:
-  EHScopeStack() : StartOfBuffer(0), EndOfBuffer(0), StartOfData(0),
-                   InnermostNormalCleanup(stable_end()),
+  EHScopeStack() : StartOfBuffer(nullptr), EndOfBuffer(nullptr),
+                   StartOfData(nullptr), InnermostNormalCleanup(stable_end()),
                    InnermostEHScope(stable_end()) {}
   ~EHScopeStack() { delete[] StartOfBuffer; }
 
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 0e8f31a48454..d7e61f0fe577 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -25,6 +25,7 @@
 #include "CodeGenModule.h"
 #include "clang/AST/Mangle.h"
 #include "clang/AST/Type.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Value.h"
@@ -52,128 +53,154 @@ public:
     CGCXXABI(CGM), UseARMMethodPtrABI(UseARMMethodPtrABI),
     UseARMGuardVarABI(UseARMGuardVarABI) { }
 
-  bool isReturnTypeIndirect(const CXXRecordDecl *RD) const {
-    // Structures with either a non-trivial destructor or a non-trivial
-    // copy constructor are always indirect.
-    return !RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor();
-  }
+  bool classifyReturnType(CGFunctionInfo &FI) const override;
 
-  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const {
+  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
     // Structures with either a non-trivial destructor or a non-trivial
     // copy constructor are always indirect.
-    if (!RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
+    // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
+    // special members.
+    if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
       return RAA_Indirect;
     return RAA_Default;
   }
 
-  bool isZeroInitializable(const MemberPointerType *MPT);
+  bool isZeroInitializable(const MemberPointerType *MPT) override;
 
-  llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT);
+  llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
 
-  llvm::Value *EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
-                                               llvm::Value *&This,
-                                               llvm::Value *MemFnPtr,
-                                               const MemberPointerType *MPT);
+  llvm::Value *
+    EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
+                                    const Expr *E,
+                                    llvm::Value *&This,
+                                    llvm::Value *MemFnPtr,
+                                    const MemberPointerType *MPT) override;
 
-  llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF,
-                                            llvm::Value *Base,
-                                            llvm::Value *MemPtr,
-                                            const MemberPointerType *MPT);
+  llvm::Value *
+    EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+                                 llvm::Value *Base,
+                                 llvm::Value *MemPtr,
+                                 const MemberPointerType *MPT) override;
 
   llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
                                            const CastExpr *E,
-                                           llvm::Value *Src);
+                                           llvm::Value *Src) override;
   llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
-                                              llvm::Constant *Src);
+                                              llvm::Constant *Src) override;
 
-  llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT);
+  llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
 
-  llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD);
+  llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD) override;
   llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
-                                        CharUnits offset);
-  llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT);
+                                        CharUnits offset) override;
+  llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
   llvm::Constant *BuildMemberPointer(const CXXMethodDecl *MD,
                                      CharUnits ThisAdjustment);
 
   llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
-                                           llvm::Value *L,
-                                           llvm::Value *R,
+                                           llvm::Value *L, llvm::Value *R,
                                            const MemberPointerType *MPT,
-                                           bool Inequality);
+                                           bool Inequality) override;
 
   llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
-                                          llvm::Value *Addr,
-                                          const MemberPointerType *MPT);
+                                         llvm::Value *Addr,
+                                         const MemberPointerType *MPT) override;
 
-  llvm::Value *adjustToCompleteObject(CodeGenFunction &CGF,
-                                      llvm::Value *ptr,
-                                      QualType type);
+  llvm::Value *adjustToCompleteObject(CodeGenFunction &CGF, llvm::Value *ptr,
+                                      QualType type) override;
 
-  llvm::Value *GetVirtualBaseClassOffset(CodeGenFunction &CGF,
-                                         llvm::Value *This,
-                                         const CXXRecordDecl *ClassDecl,
-                                         const CXXRecordDecl *BaseClassDecl);
+  void EmitFundamentalRTTIDescriptor(QualType Type);
+  void EmitFundamentalRTTIDescriptors();
+  llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
+
+  bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
+  void EmitBadTypeidCall(CodeGenFunction &CGF) override;
+  llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
+                          llvm::Value *ThisPtr,
+                          llvm::Type *StdTypeInfoPtrTy) override;
+
+  bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
+                                          QualType SrcRecordTy) override;
+
+  llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
+                                   QualType SrcRecordTy, QualType DestTy,
+                                   QualType DestRecordTy,
+                                   llvm::BasicBlock *CastEnd) override;
+
+  llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *Value,
+                                     QualType SrcRecordTy,
+                                     QualType DestTy) override;
+
+  bool EmitBadCastCall(CodeGenFunction &CGF) override;
+
+  llvm::Value *
+    GetVirtualBaseClassOffset(CodeGenFunction &CGF, llvm::Value *This,
+                              const CXXRecordDecl *ClassDecl,
+                              const CXXRecordDecl *BaseClassDecl) override;
 
   void BuildConstructorSignature(const CXXConstructorDecl *Ctor,
-                                 CXXCtorType T,
-                                 CanQualType &ResTy,
-                                 SmallVectorImpl<CanQualType> &ArgTys);
+                                 CXXCtorType T, CanQualType &ResTy,
+                                 SmallVectorImpl<CanQualType> &ArgTys) override;
 
-  void EmitCXXConstructors(const CXXConstructorDecl *D);
+  void EmitCXXConstructors(const CXXConstructorDecl *D) override;
 
   void BuildDestructorSignature(const CXXDestructorDecl *Dtor,
-                                CXXDtorType T,
-                                CanQualType &ResTy,
-                                SmallVectorImpl<CanQualType> &ArgTys);
+                                CXXDtorType T, CanQualType &ResTy,
+                                SmallVectorImpl<CanQualType> &ArgTys) override;
 
   bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
-                              CXXDtorType DT) const {
+                              CXXDtorType DT) const override {
     // Itanium does not emit any destructor variant as an inline thunk.
     // Delegating may occur as an optimization, but all variants are either
     // emitted with external linkage or as linkonce if they are inline and used.
     return false;
   }
 
-  void EmitCXXDestructors(const CXXDestructorDecl *D);
+  void EmitCXXDestructors(const CXXDestructorDecl *D) override;
+
+  void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
+                                 FunctionArgList &Params) override;
 
-  void BuildInstanceFunctionParams(CodeGenFunction &CGF,
-                                   QualType &ResTy,
-                                   FunctionArgList &Params);
+  void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
 
-  void EmitInstanceFunctionProlog(CodeGenFunction &CGF);
+  unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
+                                      const CXXConstructorDecl *D,
+                                      CXXCtorType Type, bool ForVirtualBase,
+                                      bool Delegating,
+                                      CallArgList &Args) override;
 
-  void EmitConstructorCall(CodeGenFunction &CGF,
-                           const CXXConstructorDecl *D, CXXCtorType Type,
-                           bool ForVirtualBase, bool Delegating,
-                           llvm::Value *This,
-                           CallExpr::const_arg_iterator ArgBeg,
-                           CallExpr::const_arg_iterator ArgEnd);
+  void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
+                          CXXDtorType Type, bool ForVirtualBase,
+                          bool Delegating, llvm::Value *This) override;
 
-  void emitVTableDefinitions(CodeGenVTables &CGVT, const CXXRecordDecl *RD);
+  void emitVTableDefinitions(CodeGenVTables &CGVT,
+                             const CXXRecordDecl *RD) override;
 
   llvm::Value *getVTableAddressPointInStructor(
       CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
       BaseSubobject Base, const CXXRecordDecl *NearestVBase,
-      bool &NeedsVirtualOffset);
+      bool &NeedsVirtualOffset) override;
 
   llvm::Constant *
   getVTableAddressPointForConstExpr(BaseSubobject Base,
-                                    const CXXRecordDecl *VTableClass);
+                                    const CXXRecordDecl *VTableClass) override;
 
   llvm::GlobalVariable *getAddrOfVTable(const CXXRecordDecl *RD,
-                                        CharUnits VPtrOffset);
+                                        CharUnits VPtrOffset) override;
 
   llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
-                                         llvm::Value *This, llvm::Type *Ty);
+                                         llvm::Value *This,
+                                         llvm::Type *Ty) override;
 
   void EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                  const CXXDestructorDecl *Dtor,
                                  CXXDtorType DtorType, SourceLocation CallLoc,
-                                 llvm::Value *This);
+                                 llvm::Value *This) override;
 
-  void emitVirtualInheritanceTables(const CXXRecordDecl *RD);
+  void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
 
-  void setThunkLinkage(llvm::Function *Thunk, bool ForVTable) {
+  void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD,
+                       bool ReturnAdjustment) override {
     // Allow inlining of thunks by emitting them with available_externally
     // linkage together with vtables when needed.
     if (ForVTable)
@@ -181,38 +208,71 @@ public:
   }
 
   llvm::Value *performThisAdjustment(CodeGenFunction &CGF, llvm::Value *This,
-                                     const ThisAdjustment &TA);
+                                     const ThisAdjustment &TA) override;
 
   llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
-                                       const ReturnAdjustment &RA);
+                                       const ReturnAdjustment &RA) override;
 
-  StringRef GetPureVirtualCallName() { return "__cxa_pure_virtual"; }
-  StringRef GetDeletedVirtualCallName() { return "__cxa_deleted_virtual"; }
+  StringRef GetPureVirtualCallName() override { return "__cxa_pure_virtual"; }
+  StringRef GetDeletedVirtualCallName() override
+    { return "__cxa_deleted_virtual"; }
 
-  CharUnits getArrayCookieSizeImpl(QualType elementType);
+  CharUnits getArrayCookieSizeImpl(QualType elementType) override;
   llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
                                      llvm::Value *NewPtr,
                                      llvm::Value *NumElements,
                                      const CXXNewExpr *expr,
-                                     QualType ElementType);
+                                     QualType ElementType) override;
   llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
                                    llvm::Value *allocPtr,
-                                   CharUnits cookieSize);
+                                   CharUnits cookieSize) override;
 
   void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
-                       llvm::GlobalVariable *DeclPtr, bool PerformInit);
+                       llvm::GlobalVariable *DeclPtr,
+                       bool PerformInit) override;
   void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
-                          llvm::Constant *dtor, llvm::Constant *addr);
+                          llvm::Constant *dtor, llvm::Constant *addr) override;
 
   llvm::Function *getOrCreateThreadLocalWrapper(const VarDecl *VD,
                                                 llvm::GlobalVariable *Var);
   void EmitThreadLocalInitFuncs(
-      llvm::ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
-      llvm::Function *InitFunc);
-  LValue EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
-                                    const DeclRefExpr *DRE);
+      ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
+      llvm::Function *InitFunc) override;
+  LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
+                                      QualType LValType) override;
+
+  bool NeedsVTTParameter(GlobalDecl GD) override;
+
+  /**************************** RTTI Uniqueness ******************************/
+
+protected:
+  /// Returns true if the ABI requires RTTI type_info objects to be unique
+  /// across a program.
+  virtual bool shouldRTTIBeUnique() const { return true; }
+
+public:
+  /// What sort of unique-RTTI behavior should we use?
+  enum RTTIUniquenessKind {
+    /// We are guaranteeing, or need to guarantee, that the RTTI string
+    /// is unique.
+    RUK_Unique,
+
+    /// We are not guaranteeing uniqueness for the RTTI string, so we
+    /// can demote to hidden visibility but must use string comparisons.
+    RUK_NonUniqueHidden,
+
+    /// We are not guaranteeing uniqueness for the RTTI string, so we
+    /// have to use string comparisons, but we also have to emit it with
+    /// non-hidden visibility.
+    RUK_NonUniqueVisible
+  };
 
-  bool NeedsVTTParameter(GlobalDecl GD);
+  /// Return the required visibility status for the given type and linkage in
+  /// the current ABI.
+  RTTIUniquenessKind
+  classifyRTTIUniqueness(QualType CanTy,
+                         llvm::GlobalValue::LinkageTypes Linkage) const;
+  friend class ItaniumRTTIBuilder;
 };
 
 class ARMCXXABI : public ItaniumCXXABI {
@@ -221,22 +281,31 @@ public:
     ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
                   /* UseARMGuardVarABI = */ true) {}
 
-  bool HasThisReturn(GlobalDecl GD) const {
+  bool HasThisReturn(GlobalDecl GD) const override {
     return (isa<CXXConstructorDecl>(GD.getDecl()) || (
               isa<CXXDestructorDecl>(GD.getDecl()) &&
               GD.getDtorType() != Dtor_Deleting));
   }
 
-  void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV, QualType ResTy);
+  void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV,
+                           QualType ResTy) override;
 
-  CharUnits getArrayCookieSizeImpl(QualType elementType);
+  CharUnits getArrayCookieSizeImpl(QualType elementType) override;
   llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
                                      llvm::Value *NewPtr,
                                      llvm::Value *NumElements,
                                      const CXXNewExpr *expr,
-                                     QualType ElementType);
+                                     QualType ElementType) override;
   llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, llvm::Value *allocPtr,
-                                   CharUnits cookieSize);
+                                   CharUnits cookieSize) override;
+};
+
+class iOS64CXXABI : public ARMCXXABI {
+public:
+  iOS64CXXABI(CodeGen::CodeGenModule &CGM) : ARMCXXABI(CGM) {}
+
+  // ARM64 libraries are prepared for non-unique RTTI.
+  bool shouldRTTIBeUnique() const override { return false; }
 };
 }
 
@@ -248,6 +317,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
   case TargetCXXABI::iOS:
     return new ARMCXXABI(CGM);
 
+  case TargetCXXABI::iOS64:
+    return new iOS64CXXABI(CGM);
+
   // Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't
   // include the other 32-bit ARM oddities: constructor/destructor return values
   // and array cookies.
@@ -299,11 +371,9 @@ ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
 ///
 /// If the member is non-virtual, memptr.ptr is the address of
 /// the function to call.
-llvm::Value *
-ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
-                                               llvm::Value *&This,
-                                               llvm::Value *MemFnPtr,
-                                               const MemberPointerType *MPT) {
+llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
+    CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+    llvm::Value *MemFnPtr, const MemberPointerType *MPT) {
   CGBuilderTy &Builder = CGF.Builder;
 
   const FunctionProtoType *FPT = 
@@ -355,8 +425,7 @@ ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
 
   // Cast the adjusted this to a pointer to vtable pointer and load.
   llvm::Type *VTableTy = Builder.getInt8PtrTy();
-  llvm::Value *VTable = Builder.CreateBitCast(This, VTableTy->getPointerTo());
-  VTable = Builder.CreateLoad(VTable, "memptr.vtable");
+  llvm::Value *VTable = CGF.GetVTablePtr(This, VTableTy);
 
   // Apply the offset.
   llvm::Value *VTableOffset = FnAsInt;
@@ -385,10 +454,9 @@ ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
 
 /// Compute an l-value by applying the given pointer-to-member to a
 /// base object.
-llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
-                                                         llvm::Value *Base,
-                                                         llvm::Value *MemPtr,
-                                           const MemberPointerType *MPT) {
+llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(
+    CodeGenFunction &CGF, const Expr *E, llvm::Value *Base, llvm::Value *MemPtr,
+    const MemberPointerType *MPT) {
   assert(MemPtr->getType() == CGM.PtrDiffTy);
 
   CGBuilderTy &Builder = CGF.Builder;
@@ -748,6 +816,21 @@ ItaniumCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
   return Result;
 }
 
+bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
+  const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
+  if (!RD)
+    return false;
+
+  // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor.
+  // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
+  // special members.
+  if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) {
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return true;
+  }
+  return false;
+}
+
 /// The Itanium ABI requires non-zero initialization only for data
 /// member pointers, for which '0' is a valid offset.
 bool ItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
@@ -773,6 +856,194 @@ llvm::Value *ItaniumCXXABI::adjustToCompleteObject(CodeGenFunction &CGF,
   return CGF.Builder.CreateInBoundsGEP(ptr, offset);
 }
 
+static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) {
+  // void *__dynamic_cast(const void *sub,
+  //                      const abi::__class_type_info *src,
+  //                      const abi::__class_type_info *dst,
+  //                      std::ptrdiff_t src2dst_offset);
+  
+  llvm::Type *Int8PtrTy = CGF.Int8PtrTy;
+  llvm::Type *PtrDiffTy = 
+    CGF.ConvertType(CGF.getContext().getPointerDiffType());
+
+  llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy };
+
+  llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false);
+
+  // Mark the function as nounwind readonly.
+  llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
+                                            llvm::Attribute::ReadOnly };
+  llvm::AttributeSet Attrs = llvm::AttributeSet::get(
+      CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
+
+  return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs);
+}
+
+static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) {
+  // void __cxa_bad_cast();
+  llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);
+  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast");
+}
+
+/// \brief Compute the src2dst_offset hint as described in the
+/// Itanium C++ ABI [2.9.7]
+static CharUnits computeOffsetHint(ASTContext &Context,
+                                   const CXXRecordDecl *Src,
+                                   const CXXRecordDecl *Dst) {
+  CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
+                     /*DetectVirtual=*/false);
+
+  // If Dst is not derived from Src we can skip the whole computation below and
+  // return that Src is not a public base of Dst.  Record all inheritance paths.
+  if (!Dst->isDerivedFrom(Src, Paths))
+    return CharUnits::fromQuantity(-2ULL);
+
+  unsigned NumPublicPaths = 0;
+  CharUnits Offset;
+
+  // Now walk all possible inheritance paths.
+  for (CXXBasePaths::paths_iterator I = Paths.begin(), E = Paths.end(); I != E;
+       ++I) {
+    if (I->Access != AS_public) // Ignore non-public inheritance.
+      continue;
+
+    ++NumPublicPaths;
+
+    for (CXXBasePath::iterator J = I->begin(), JE = I->end(); J != JE; ++J) {
+      // If the path contains a virtual base class we can't give any hint.
+      // -1: no hint.
+      if (J->Base->isVirtual())
+        return CharUnits::fromQuantity(-1ULL);
+
+      if (NumPublicPaths > 1) // Won't use offsets, skip computation.
+        continue;
+
+      // Accumulate the base class offsets.
+      const ASTRecordLayout &L = Context.getASTRecordLayout(J->Class);
+      Offset += L.getBaseClassOffset(J->Base->getType()->getAsCXXRecordDecl());
+    }
+  }
+
+  // -2: Src is not a public base of Dst.
+  if (NumPublicPaths == 0)
+    return CharUnits::fromQuantity(-2ULL);
+
+  // -3: Src is a multiple public base type but never a virtual base type.
+  if (NumPublicPaths > 1)
+    return CharUnits::fromQuantity(-3ULL);
+
+  // Otherwise, the Src type is a unique public nonvirtual base type of Dst.
+  // Return the offset of Src from the origin of Dst.
+  return Offset;
+}
+
+static llvm::Constant *getBadTypeidFn(CodeGenFunction &CGF) {
+  // void __cxa_bad_typeid();
+  llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);
+
+  return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_typeid");
+}
+
+bool ItaniumCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
+                                              QualType SrcRecordTy) {
+  return IsDeref;
+}
+
+void ItaniumCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
+  llvm::Value *Fn = getBadTypeidFn(CGF);
+  CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
+  CGF.Builder.CreateUnreachable();
+}
+
+llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF,
+                                       QualType SrcRecordTy,
+                                       llvm::Value *ThisPtr,
+                                       llvm::Type *StdTypeInfoPtrTy) {
+  llvm::Value *Value =
+      CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo());
+
+  // Load the type info.
+  Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
+  return CGF.Builder.CreateLoad(Value);
+}
+
+bool ItaniumCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
+                                                       QualType SrcRecordTy) {
+  return SrcIsPtr;
+}
+
+llvm::Value *ItaniumCXXABI::EmitDynamicCastCall(
+    CodeGenFunction &CGF, llvm::Value *Value, QualType SrcRecordTy,
+    QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
+  llvm::Type *PtrDiffLTy =
+      CGF.ConvertType(CGF.getContext().getPointerDiffType());
+  llvm::Type *DestLTy = CGF.ConvertType(DestTy);
+
+  llvm::Value *SrcRTTI =
+      CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType());
+  llvm::Value *DestRTTI =
+      CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
+
+  // Compute the offset hint.
+  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
+  llvm::Value *OffsetHint = llvm::ConstantInt::get(
+      PtrDiffLTy,
+      computeOffsetHint(CGF.getContext(), SrcDecl, DestDecl).getQuantity());
+
+  // Emit the call to __dynamic_cast.
+  Value = CGF.EmitCastToVoidPtr(Value);
+
+  llvm::Value *args[] = {Value, SrcRTTI, DestRTTI, OffsetHint};
+  Value = CGF.EmitNounwindRuntimeCall(getItaniumDynamicCastFn(CGF), args);
+  Value = CGF.Builder.CreateBitCast(Value, DestLTy);
+
+  /// C++ [expr.dynamic.cast]p9:
+  ///   A failed cast to reference type throws std::bad_cast
+  if (DestTy->isReferenceType()) {
+    llvm::BasicBlock *BadCastBlock =
+        CGF.createBasicBlock("dynamic_cast.bad_cast");
+
+    llvm::Value *IsNull = CGF.Builder.CreateIsNull(Value);
+    CGF.Builder.CreateCondBr(IsNull, BadCastBlock, CastEnd);
+
+    CGF.EmitBlock(BadCastBlock);
+    EmitBadCastCall(CGF);
+  }
+
+  return Value;
+}
+
+llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF,
+                                                  llvm::Value *Value,
+                                                  QualType SrcRecordTy,
+                                                  QualType DestTy) {
+  llvm::Type *PtrDiffLTy =
+      CGF.ConvertType(CGF.getContext().getPointerDiffType());
+  llvm::Type *DestLTy = CGF.ConvertType(DestTy);
+
+  // Get the vtable pointer.
+  llvm::Value *VTable = CGF.GetVTablePtr(Value, PtrDiffLTy->getPointerTo());
+
+  // Get the offset-to-top from the vtable.
+  llvm::Value *OffsetToTop =
+      CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL);
+  OffsetToTop = CGF.Builder.CreateLoad(OffsetToTop, "offset.to.top");
+
+  // Finally, add the offset to the pointer.
+  Value = CGF.EmitCastToVoidPtr(Value);
+  Value = CGF.Builder.CreateInBoundsGEP(Value, OffsetToTop);
+
+  return CGF.Builder.CreateBitCast(Value, DestLTy);
+}
+
+bool ItaniumCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
+  llvm::Value *Fn = getBadCastFn(CGF);
+  CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
+  CGF.Builder.CreateUnreachable();
+  return true;
+}
+
 llvm::Value *
 ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
                                          llvm::Value *This,
@@ -797,34 +1068,35 @@ ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
 
 /// The generic ABI passes 'this', plus a VTT if it's initializing a
 /// base subobject.
-void ItaniumCXXABI::BuildConstructorSignature(const CXXConstructorDecl *Ctor,
-                                              CXXCtorType Type,
-                                              CanQualType &ResTy,
-                                SmallVectorImpl<CanQualType> &ArgTys) {
+void
+ItaniumCXXABI::BuildConstructorSignature(const CXXConstructorDecl *Ctor,
+                                         CXXCtorType Type, CanQualType &ResTy,
+                                         SmallVectorImpl<CanQualType> &ArgTys) {
   ASTContext &Context = getContext();
 
-  // 'this' parameter is already there, as well as 'this' return if
-  // HasThisReturn(GlobalDecl(Ctor, Type)) is true
+  // All parameters are already in place except VTT, which goes after 'this'.
+  // These are Clang types, so we don't need to worry about sret yet.
 
   // Check if we need to add a VTT parameter (which has type void **).
   if (Type == Ctor_Base && Ctor->getParent()->getNumVBases() != 0)
-    ArgTys.push_back(Context.getPointerType(Context.VoidPtrTy));
+    ArgTys.insert(ArgTys.begin() + 1,
+                  Context.getPointerType(Context.VoidPtrTy));
 }
 
 void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
   // Just make sure we're in sync with TargetCXXABI.
   assert(CGM.getTarget().getCXXABI().hasConstructorVariants());
 
+  // The constructor used for constructing this as a base class;
+  // ignores virtual bases.
+  CGM.EmitGlobal(GlobalDecl(D, Ctor_Base));
+
   // The constructor used for constructing this as a complete class;
   // constucts the virtual bases, then calls the base constructor.
   if (!D->getParent()->isAbstract()) {
     // We don't need to emit the complete ctor if the class is abstract.
     CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));
   }
-
-  // The constructor used for constructing this as a base class;
-  // ignores virtual bases.
-  CGM.EmitGlobal(GlobalDecl(D, Ctor_Base));
 }
 
 /// The generic ABI passes 'this', plus a VTT if it's destroying a
@@ -844,29 +1116,26 @@ void ItaniumCXXABI::BuildDestructorSignature(const CXXDestructorDecl *Dtor,
 }
 
 void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
-  // The destructor in a virtual table is always a 'deleting'
-  // destructor, which calls the complete destructor and then uses the
-  // appropriate operator delete.
-  if (D->isVirtual())
-    CGM.EmitGlobal(GlobalDecl(D, Dtor_Deleting));
+  // The destructor used for destructing this as a base class; ignores
+  // virtual bases.
+  CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
 
   // The destructor used for destructing this as a most-derived class;
   // call the base destructor and then destructs any virtual bases.
   CGM.EmitGlobal(GlobalDecl(D, Dtor_Complete));
 
-  // The destructor used for destructing this as a base class; ignores
-  // virtual bases.
-  CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
+  // The destructor in a virtual table is always a 'deleting'
+  // destructor, which calls the complete destructor and then uses the
+  // appropriate operator delete.
+  if (D->isVirtual())
+    CGM.EmitGlobal(GlobalDecl(D, Dtor_Deleting));
 }
 
-void ItaniumCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
-                                                QualType &ResTy,
-                                                FunctionArgList &Params) {
-  /// Create the 'this' variable.
-  BuildThisParam(CGF, Params);
-
+void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
+                                              QualType &ResTy,
+                                              FunctionArgList &Params) {
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
-  assert(MD->isInstance());
+  assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
 
   // Check if we need a VTT parameter as well.
   if (NeedsVTTParameter(CGF.CurGD)) {
@@ -875,10 +1144,10 @@ void ItaniumCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
     // FIXME: avoid the fake decl
     QualType T = Context.getPointerType(Context.VoidPtrTy);
     ImplicitParamDecl *VTTDecl
-      = ImplicitParamDecl::Create(Context, 0, MD->getLocation(),
+      = ImplicitParamDecl::Create(Context, nullptr, MD->getLocation(),
                                   &Context.Idents.get("vtt"), T);
-    Params.push_back(VTTDecl);
-    getVTTDecl(CGF) = VTTDecl;
+    Params.insert(Params.begin() + 1, VTTDecl);
+    getStructorImplicitParamDecl(CGF) = VTTDecl;
   }
 }
 
@@ -887,10 +1156,9 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
   EmitThisParam(CGF);
 
   /// Initialize the 'vtt' slot if needed.
-  if (getVTTDecl(CGF)) {
-    getVTTValue(CGF)
-      = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getVTTDecl(CGF)),
-                               "vtt");
+  if (getStructorImplicitParamDecl(CGF)) {
+    getStructorImplicitParamValue(CGF) = CGF.Builder.CreateLoad(
+        CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)), "vtt");
   }
 
   /// If this is a function that the ABI specifies returns 'this', initialize
@@ -905,21 +1173,39 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
     CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
 }
 
-void ItaniumCXXABI::EmitConstructorCall(CodeGenFunction &CGF,
-                                        const CXXConstructorDecl *D,
-                                        CXXCtorType Type,
-                                        bool ForVirtualBase, bool Delegating,
-                                        llvm::Value *This,
-                                        CallExpr::const_arg_iterator ArgBeg,
-                                        CallExpr::const_arg_iterator ArgEnd) {
-  llvm::Value *VTT = CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase,
-                                         Delegating);
+unsigned ItaniumCXXABI::addImplicitConstructorArgs(
+    CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
+    bool ForVirtualBase, bool Delegating, CallArgList &Args) {
+  if (!NeedsVTTParameter(GlobalDecl(D, Type)))
+    return 0;
+
+  // Insert the implicit 'vtt' argument as the second argument.
+  llvm::Value *VTT =
+      CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating);
   QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
-  llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Type);
+  Args.insert(Args.begin() + 1,
+              CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false));
+  return 1;  // Added one arg.
+}
+
+void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
+                                       const CXXDestructorDecl *DD,
+                                       CXXDtorType Type, bool ForVirtualBase,
+                                       bool Delegating, llvm::Value *This) {
+  GlobalDecl GD(DD, Type);
+  llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
+  QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
+
+  llvm::Value *Callee = nullptr;
+  if (getContext().getLangOpts().AppleKext)
+    Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
+
+  if (!Callee)
+    Callee = CGM.GetAddrOfCXXDestructor(DD, Type);
 
   // FIXME: Provide a source location here.
-  CGF.EmitCXXMemberCall(D, SourceLocation(), Callee, ReturnValueSlot(),
-                        This, VTT, VTTTy, ArgBeg, ArgEnd);
+  CGF.EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
+                        VTT, VTTTy, nullptr, nullptr);
 }
 
 void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -931,18 +1217,20 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
   ItaniumVTableContext &VTContext = CGM.getItaniumVTableContext();
   const VTableLayout &VTLayout = VTContext.getVTableLayout(RD);
   llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
+  llvm::Constant *RTTI =
+      CGM.GetAddrOfRTTIDescriptor(CGM.getContext().getTagDeclType(RD));
 
   // Create and set the initializer.
   llvm::Constant *Init = CGVT.CreateVTableInitializer(
       RD, VTLayout.vtable_component_begin(), VTLayout.getNumVTableComponents(),
-      VTLayout.vtable_thunk_begin(), VTLayout.getNumVTableThunks());
+      VTLayout.vtable_thunk_begin(), VTLayout.getNumVTableThunks(), RTTI);
   VTable->setInitializer(Init);
 
   // Set the correct linkage.
   VTable->setLinkage(Linkage);
 
   // Set the right visibility.
-  CGM.setTypeVisibility(VTable, RD, CodeGenModule::TVK_ForVTable);
+  CGM.setGlobalVisibility(VTable, RD);
 
   // If this is the magic class __cxxabiv1::__fundamental_type_info,
   // we will emit the typeinfo for the fundamental types. This is the
@@ -953,7 +1241,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
       isa<NamespaceDecl>(DC) && cast<NamespaceDecl>(DC)->getIdentifier() &&
       cast<NamespaceDecl>(DC)->getIdentifier()->isStr("__cxxabiv1") &&
       DC->getParent()->isTranslationUnit())
-    CGM.EmitFundamentalRTTIDescriptors();
+    EmitFundamentalRTTIDescriptors();
 }
 
 llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructor(
@@ -1028,6 +1316,12 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
       Name, ArrayType, llvm::GlobalValue::ExternalLinkage);
   VTable->setUnnamedAddr(true);
+
+  if (RD->hasAttr<DLLImportAttr>())
+    VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+  else if (RD->hasAttr<DLLExportAttr>())
+    VTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+
   return VTable;
 }
 
@@ -1059,7 +1353,8 @@ void ItaniumCXXABI::EmitVirtualDestructorCall(CodeGenFunction &CGF,
       getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty);
 
   CGF.EmitCXXMemberCall(Dtor, CallLoc, Callee, ReturnValueSlot(), This,
-                        /*ImplicitParam=*/0, QualType(), 0, 0);
+                        /*ImplicitParam=*/nullptr, QualType(), nullptr,
+                        nullptr);
 }
 
 void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
@@ -1305,7 +1600,7 @@ namespace {
     llvm::GlobalVariable *Guard;
     CallGuardAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}
 
-    void Emit(CodeGenFunction &CGF, Flags flags) {
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitNounwindRuntimeCall(getGuardAbortFn(CGF.CGM, Guard->getType()),
                                   Guard);
     }
@@ -1365,25 +1660,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   }
 
   // Test whether the variable has completed initialization.
-  llvm::Value *isInitialized;
-
-  // ARM C++ ABI 3.2.3.1:
-  //   To support the potential use of initialization guard variables
-  //   as semaphores that are the target of ARM SWP and LDREX/STREX
-  //   synchronizing instructions we define a static initialization
-  //   guard variable to be a 4-byte aligned, 4- byte word with the
-  //   following inline access protocol.
-  //     #define INITIALIZED 1
-  //     if ((obj_guard & INITIALIZED) != INITIALIZED) {
-  //       if (__cxa_guard_acquire(&obj_guard))
-  //         ...
-  //     }
-  if (UseARMGuardVarABI && !useInt8GuardVariable) {
-    llvm::Value *V = Builder.CreateLoad(guard);
-    llvm::Value *Test1 = llvm::ConstantInt::get(guardTy, 1);
-    V = Builder.CreateAnd(V, Test1);
-    isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
-
+  //
   // Itanium C++ ABI 3.3.2:
   //   The following is pseudo-code showing how these functions can be used:
   //     if (obj_guard.first_byte == 0) {
@@ -1398,23 +1675,46 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   //         __cxa_guard_release (&obj_guard);
   //       }
   //     }
-  } else {
-    // Load the first byte of the guard variable.
-    llvm::LoadInst *LI = 
+
+  // Load the first byte of the guard variable.
+  llvm::LoadInst *LI =
       Builder.CreateLoad(Builder.CreateBitCast(guard, CGM.Int8PtrTy));
-    LI->setAlignment(1);
-
-    // Itanium ABI:
-    //   An implementation supporting thread-safety on multiprocessor
-    //   systems must also guarantee that references to the initialized
-    //   object do not occur before the load of the initialization flag.
-    //
-    // In LLVM, we do this by marking the load Acquire.
-    if (threadsafe)
-      LI->setAtomic(llvm::Acquire);
-
-    isInitialized = Builder.CreateIsNull(LI, "guard.uninitialized");
-  }
+  LI->setAlignment(1);
+
+  // Itanium ABI:
+  //   An implementation supporting thread-safety on multiprocessor
+  //   systems must also guarantee that references to the initialized
+  //   object do not occur before the load of the initialization flag.
+  //
+  // In LLVM, we do this by marking the load Acquire.
+  if (threadsafe)
+    LI->setAtomic(llvm::Acquire);
+
+  // For ARM, we should only check the first bit, rather than the entire byte:
+  //
+  // ARM C++ ABI 3.2.3.1:
+  //   To support the potential use of initialization guard variables
+  //   as semaphores that are the target of ARM SWP and LDREX/STREX
+  //   synchronizing instructions we define a static initialization
+  //   guard variable to be a 4-byte aligned, 4-byte word with the
+  //   following inline access protocol.
+  //     #define INITIALIZED 1
+  //     if ((obj_guard & INITIALIZED) != INITIALIZED) {
+  //       if (__cxa_guard_acquire(&obj_guard))
+  //         ...
+  //     }
+  //
+  // and similarly for ARM64:
+  //
+  // ARM64 C++ ABI 3.2.2:
+  //   This ABI instead only specifies the value bit 0 of the static guard
+  //   variable; all other bits are platform defined. Bit 0 shall be 0 when the
+  //   variable is not initialized and 1 when it is.
+  llvm::Value *V =
+      (UseARMGuardVarABI && !useInt8GuardVariable)
+          ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
+          : LI;
+  llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
 
   llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
@@ -1518,16 +1818,34 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF,
   CGF.registerGlobalDtorWithAtExit(D, dtor, addr);
 }
 
+static bool isThreadWrapperReplaceable(const VarDecl *VD,
+                                       CodeGen::CodeGenModule &CGM) {
+  assert(!VD->isStaticLocal() && "static local VarDecls don't need wrappers!");
+  // OS X prefers to have references to thread local variables to go through
+  // the thread wrapper instead of directly referencing the backing variable.
+  return VD->getTLSKind() == VarDecl::TLS_Dynamic &&
+         CGM.getTarget().getTriple().isMacOSX();
+}
+
 /// Get the appropriate linkage for the wrapper function. This is essentially
-/// the weak form of the variable's linkage; every translation unit which wneeds
+/// the weak form of the variable's linkage; every translation unit which needs
 /// the wrapper emits a copy, and we want the linker to merge them.
-static llvm::GlobalValue::LinkageTypes getThreadLocalWrapperLinkage(
-    llvm::GlobalValue::LinkageTypes VarLinkage) {
-  if (llvm::GlobalValue::isLinkerPrivateLinkage(VarLinkage))
-    return llvm::GlobalValue::LinkerPrivateWeakLinkage;
+static llvm::GlobalValue::LinkageTypes
+getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) {
+  llvm::GlobalValue::LinkageTypes VarLinkage =
+      CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false);
+
   // For internal linkage variables, we don't need an external or weak wrapper.
   if (llvm::GlobalValue::isLocalLinkage(VarLinkage))
     return VarLinkage;
+
+  // If the thread wrapper is replaceable, give it appropriate linkage.
+  if (isThreadWrapperReplaceable(VD, CGM)) {
+    if (llvm::GlobalVariable::isLinkOnceLinkage(VarLinkage) ||
+        llvm::GlobalVariable::isWeakODRLinkage(VarLinkage))
+      return llvm::GlobalVariable::WeakAnyLinkage;
+    return VarLinkage;
+  }
   return llvm::GlobalValue::WeakODRLinkage;
 }
 
@@ -1550,21 +1868,28 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
     RetTy = RetTy->getPointerElementType();
 
   llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, false);
-  llvm::Function *Wrapper = llvm::Function::Create(
-      FnTy, getThreadLocalWrapperLinkage(Var->getLinkage()), WrapperName.str(),
-      &CGM.getModule());
+  llvm::Function *Wrapper =
+      llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
+                             WrapperName.str(), &CGM.getModule());
   // Always resolve references to the wrapper at link time.
-  Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  if (!Wrapper->hasLocalLinkage() && !isThreadWrapperReplaceable(VD, CGM))
+    Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
   return Wrapper;
 }
 
 void ItaniumCXXABI::EmitThreadLocalInitFuncs(
-    llvm::ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
+    ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *> > Decls,
     llvm::Function *InitFunc) {
   for (unsigned I = 0, N = Decls.size(); I != N; ++I) {
     const VarDecl *VD = Decls[I].first;
     llvm::GlobalVariable *Var = Decls[I].second;
 
+    // Some targets require that all access to thread local variables go through
+    // the thread wrapper.  This means that we cannot attempt to create a thread
+    // wrapper or a thread helper.
+    if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition())
+      continue;
+
     // Mangle the name for the thread_local initialization function.
     SmallString<256> InitFnName;
     {
@@ -1576,14 +1901,13 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     // If we have a definition for the variable, emit the initialization
     // function as an alias to the global Init function (if any). Otherwise,
     // produce a declaration of the initialization function.
-    llvm::GlobalValue *Init = 0;
+    llvm::GlobalValue *Init = nullptr;
     bool InitIsInitFunc = false;
     if (VD->hasDefinition()) {
       InitIsInitFunc = true;
       if (InitFunc)
-        Init =
-            new llvm::GlobalAlias(InitFunc->getType(), Var->getLinkage(),
-                                  InitFnName.str(), InitFunc, &CGM.getModule());
+        Init = llvm::GlobalAlias::create(Var->getLinkage(), InitFnName.str(),
+                                         InitFunc);
     } else {
       // Emit a weak global function referring to the initialization function.
       // This function will not exist if the TU defining the thread_local
@@ -1632,9 +1956,9 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
   }
 }
 
-LValue ItaniumCXXABI::EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
-                                                 const DeclRefExpr *DRE) {
-  const VarDecl *VD = cast<VarDecl>(DRE->getDecl());
+LValue ItaniumCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
+                                                   const VarDecl *VD,
+                                                   QualType LValType) {
   QualType T = VD->getType();
   llvm::Type *Ty = CGF.getTypes().ConvertTypeForMem(T);
   llvm::Value *Val = CGF.CGM.GetAddrOfGlobalVar(VD, Ty);
@@ -1645,10 +1969,9 @@ LValue ItaniumCXXABI::EmitThreadLocalDeclRefExpr(CodeGenFunction &CGF,
 
   LValue LV;
   if (VD->getType()->isReferenceType())
-    LV = CGF.MakeNaturalAlignAddrLValue(Val, T);
+    LV = CGF.MakeNaturalAlignAddrLValue(Val, LValType);
   else
-    LV = CGF.MakeAddrLValue(Val, DRE->getType(),
-                            CGF.getContext().getDeclAlign(VD));
+    LV = CGF.MakeAddrLValue(Val, LValType, CGF.getContext().getDeclAlign(VD));
   // FIXME: need setObjCGCLValueClass?
   return LV;
 }
@@ -1672,3 +1995,996 @@ bool ItaniumCXXABI::NeedsVTTParameter(GlobalDecl GD) {
   
   return false;
 }
+
+namespace {
+class ItaniumRTTIBuilder {
+  CodeGenModule &CGM;  // Per-module state.
+  llvm::LLVMContext &VMContext;
+  const ItaniumCXXABI &CXXABI;  // Per-module state.
+
+  /// Fields - The fields of the RTTI descriptor currently being built.
+  SmallVector<llvm::Constant *, 16> Fields;
+
+  /// GetAddrOfTypeName - Returns the mangled type name of the given type.
+  llvm::GlobalVariable *
+  GetAddrOfTypeName(QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage);
+
+  /// GetAddrOfExternalRTTIDescriptor - Returns the constant for the RTTI
+  /// descriptor of the given type.
+  llvm::Constant *GetAddrOfExternalRTTIDescriptor(QualType Ty);
+
+  /// BuildVTablePointer - Build the vtable pointer for the given type.
+  void BuildVTablePointer(const Type *Ty);
+
+  /// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
+  /// inheritance, according to the Itanium C++ ABI, 2.9.5p6b.
+  void BuildSIClassTypeInfo(const CXXRecordDecl *RD);
+
+  /// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
+  /// classes with bases that do not satisfy the abi::__si_class_type_info
+  /// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
+  void BuildVMIClassTypeInfo(const CXXRecordDecl *RD);
+
+  /// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct, used
+  /// for pointer types.
+  void BuildPointerTypeInfo(QualType PointeeTy);
+
+  /// BuildObjCObjectTypeInfo - Build the appropriate kind of
+  /// type_info for an object type.
+  void BuildObjCObjectTypeInfo(const ObjCObjectType *Ty);
+
+  /// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info
+  /// struct, used for member pointer types.
+  void BuildPointerToMemberTypeInfo(const MemberPointerType *Ty);
+
+public:
+  ItaniumRTTIBuilder(const ItaniumCXXABI &ABI)
+      : CGM(ABI.CGM), VMContext(CGM.getModule().getContext()), CXXABI(ABI) {}
+
+  // Pointer type info flags.
+  enum {
+    /// PTI_Const - Type has const qualifier.
+    PTI_Const = 0x1,
+
+    /// PTI_Volatile - Type has volatile qualifier.
+    PTI_Volatile = 0x2,
+
+    /// PTI_Restrict - Type has restrict qualifier.
+    PTI_Restrict = 0x4,
+
+    /// PTI_Incomplete - Type is incomplete.
+    PTI_Incomplete = 0x8,
+
+    /// PTI_ContainingClassIncomplete - Containing class is incomplete.
+    /// (in pointer to member).
+    PTI_ContainingClassIncomplete = 0x10
+  };
+
+  // VMI type info flags.
+  enum {
+    /// VMI_NonDiamondRepeat - Class has non-diamond repeated inheritance.
+    VMI_NonDiamondRepeat = 0x1,
+
+    /// VMI_DiamondShaped - Class is diamond shaped.
+    VMI_DiamondShaped = 0x2
+  };
+
+  // Base class type info flags.
+  enum {
+    /// BCTI_Virtual - Base class is virtual.
+    BCTI_Virtual = 0x1,
+
+    /// BCTI_Public - Base class is public.
+    BCTI_Public = 0x2
+  };
+
+  /// BuildTypeInfo - Build the RTTI type info struct for the given type.
+  ///
+  /// \param Force - true to force the creation of this RTTI value
+  llvm::Constant *BuildTypeInfo(QualType Ty, bool Force = false);
+};
+}
+
+llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
+    QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage) {
+  SmallString<256> OutName;
+  llvm::raw_svector_ostream Out(OutName);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(Ty, Out);
+  Out.flush();
+  StringRef Name = OutName.str();
+
+  // We know that the mangled name of the type starts at index 4 of the
+  // mangled name of the typename, so we can just index into it in order to
+  // get the mangled name of the type.
+  llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext,
+                                                            Name.substr(4));
+
+  llvm::GlobalVariable *GV =
+    CGM.CreateOrReplaceCXXRuntimeVariable(Name, Init->getType(), Linkage);
+
+  GV->setInitializer(Init);
+
+  return GV;
+}
+
+llvm::Constant *
+ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
+  // Mangle the RTTI name.
+  SmallString<256> OutName;
+  llvm::raw_svector_ostream Out(OutName);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
+  Out.flush();
+  StringRef Name = OutName.str();
+
+  // Look for an existing global.
+  llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name);
+
+  if (!GV) {
+    // Create a new global variable.
+    GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
+                                  /*Constant=*/true,
+                                  llvm::GlobalValue::ExternalLinkage, nullptr,
+                                  Name);
+  }
+
+  return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+}
+
+/// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type
+/// info for that type is defined in the standard library.
+static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
+  // Itanium C++ ABI 2.9.2:
+  //   Basic type information (e.g. for "int", "bool", etc.) will be kept in
+  //   the run-time support library. Specifically, the run-time support
+  //   library should contain type_info objects for the types X, X* and
+  //   X const*, for every X in: void, std::nullptr_t, bool, wchar_t, char,
+  //   unsigned char, signed char, short, unsigned short, int, unsigned int,
+  //   long, unsigned long, long long, unsigned long long, float, double,
+  //   long double, char16_t, char32_t, and the IEEE 754r decimal and
+  //   half-precision floating point types.
+  switch (Ty->getKind()) {
+    case BuiltinType::Void:
+    case BuiltinType::NullPtr:
+    case BuiltinType::Bool:
+    case BuiltinType::WChar_S:
+    case BuiltinType::WChar_U:
+    case BuiltinType::Char_U:
+    case BuiltinType::Char_S:
+    case BuiltinType::UChar:
+    case BuiltinType::SChar:
+    case BuiltinType::Short:
+    case BuiltinType::UShort:
+    case BuiltinType::Int:
+    case BuiltinType::UInt:
+    case BuiltinType::Long:
+    case BuiltinType::ULong:
+    case BuiltinType::LongLong:
+    case BuiltinType::ULongLong:
+    case BuiltinType::Half:
+    case BuiltinType::Float:
+    case BuiltinType::Double:
+    case BuiltinType::LongDouble:
+    case BuiltinType::Char16:
+    case BuiltinType::Char32:
+    case BuiltinType::Int128:
+    case BuiltinType::UInt128:
+    case BuiltinType::OCLImage1d:
+    case BuiltinType::OCLImage1dArray:
+    case BuiltinType::OCLImage1dBuffer:
+    case BuiltinType::OCLImage2d:
+    case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage3d:
+    case BuiltinType::OCLSampler:
+    case BuiltinType::OCLEvent:
+      return true;
+
+    case BuiltinType::Dependent:
+#define BUILTIN_TYPE(Id, SingletonId)
+#define PLACEHOLDER_TYPE(Id, SingletonId) \
+    case BuiltinType::Id:
+#include "clang/AST/BuiltinTypes.def"
+      llvm_unreachable("asking for RRTI for a placeholder type!");
+
+    case BuiltinType::ObjCId:
+    case BuiltinType::ObjCClass:
+    case BuiltinType::ObjCSel:
+      llvm_unreachable("FIXME: Objective-C types are unsupported!");
+  }
+
+  llvm_unreachable("Invalid BuiltinType Kind!");
+}
+
+static bool TypeInfoIsInStandardLibrary(const PointerType *PointerTy) {
+  QualType PointeeTy = PointerTy->getPointeeType();
+  const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(PointeeTy);
+  if (!BuiltinTy)
+    return false;
+
+  // Check the qualifiers.
+  Qualifiers Quals = PointeeTy.getQualifiers();
+  Quals.removeConst();
+
+  if (!Quals.empty())
+    return false;
+
+  return TypeInfoIsInStandardLibrary(BuiltinTy);
+}
+
+/// IsStandardLibraryRTTIDescriptor - Returns whether the type
+/// information for the given type exists in the standard library.
+static bool IsStandardLibraryRTTIDescriptor(QualType Ty) {
+  // Type info for builtin types is defined in the standard library.
+  if (const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(Ty))
+    return TypeInfoIsInStandardLibrary(BuiltinTy);
+
+  // Type info for some pointer types to builtin types is defined in the
+  // standard library.
+  if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
+    return TypeInfoIsInStandardLibrary(PointerTy);
+
+  return false;
+}
+
+/// ShouldUseExternalRTTIDescriptor - Returns whether the type information for
+/// the given type exists somewhere else, and that we should not emit the type
+/// information in this translation unit.  Assumes that it is not a
+/// standard-library type.
+static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
+                                            QualType Ty) {
+  ASTContext &Context = CGM.getContext();
+
+  // If RTTI is disabled, assume it might be disabled in the
+  // translation unit that defines any potential key function, too.
+  if (!Context.getLangOpts().RTTI) return false;
+
+  if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
+    const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
+    if (!RD->hasDefinition())
+      return false;
+
+    if (!RD->isDynamicClass())
+      return false;
+
+    // FIXME: this may need to be reconsidered if the key function
+    // changes.
+    return CGM.getVTables().isVTableExternal(RD);
+  }
+
+  return false;
+}
+
+/// IsIncompleteClassType - Returns whether the given record type is incomplete.
+static bool IsIncompleteClassType(const RecordType *RecordTy) {
+  return !RecordTy->getDecl()->isCompleteDefinition();
+}
+
+/// ContainsIncompleteClassType - Returns whether the given type contains an
+/// incomplete class type. This is true if
+///
+///   * The given type is an incomplete class type.
+///   * The given type is a pointer type whose pointee type contains an
+///     incomplete class type.
+///   * The given type is a member pointer type whose class is an incomplete
+///     class type.
+///   * The given type is a member pointer type whoise pointee type contains an
+///     incomplete class type.
+/// is an indirect or direct pointer to an incomplete class type.
+static bool ContainsIncompleteClassType(QualType Ty) {
+  if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
+    if (IsIncompleteClassType(RecordTy))
+      return true;
+  }
+
+  if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
+    return ContainsIncompleteClassType(PointerTy->getPointeeType());
+
+  if (const MemberPointerType *MemberPointerTy =
+      dyn_cast<MemberPointerType>(Ty)) {
+    // Check if the class type is incomplete.
+    const RecordType *ClassType = cast<RecordType>(MemberPointerTy->getClass());
+    if (IsIncompleteClassType(ClassType))
+      return true;
+
+    return ContainsIncompleteClassType(MemberPointerTy->getPointeeType());
+  }
+
+  return false;
+}
+
+// CanUseSingleInheritance - Return whether the given record decl has a "single,
+// public, non-virtual base at offset zero (i.e. the derived class is dynamic
+// iff the base is)", according to Itanium C++ ABI, 2.95p6b.
+static bool CanUseSingleInheritance(const CXXRecordDecl *RD) {
+  // Check the number of bases.
+  if (RD->getNumBases() != 1)
+    return false;
+
+  // Get the base.
+  CXXRecordDecl::base_class_const_iterator Base = RD->bases_begin();
+
+  // Check that the base is not virtual.
+  if (Base->isVirtual())
+    return false;
+
+  // Check that the base is public.
+  if (Base->getAccessSpecifier() != AS_public)
+    return false;
+
+  // Check that the class is dynamic iff the base is.
+  const CXXRecordDecl *BaseDecl =
+    cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
+  if (!BaseDecl->isEmpty() &&
+      BaseDecl->isDynamicClass() != RD->isDynamicClass())
+    return false;
+
+  return true;
+}
+
+void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
+  // abi::__class_type_info.
+  static const char * const ClassTypeInfo =
+    "_ZTVN10__cxxabiv117__class_type_infoE";
+  // abi::__si_class_type_info.
+  static const char * const SIClassTypeInfo =
+    "_ZTVN10__cxxabiv120__si_class_type_infoE";
+  // abi::__vmi_class_type_info.
+  static const char * const VMIClassTypeInfo =
+    "_ZTVN10__cxxabiv121__vmi_class_type_infoE";
+
+  const char *VTableName = nullptr;
+
+  switch (Ty->getTypeClass()) {
+#define TYPE(Class, Base)
+#define ABSTRACT_TYPE(Class, Base)
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
+#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
+#define DEPENDENT_TYPE(Class, Base) case Type::Class:
+#include "clang/AST/TypeNodes.def"
+    llvm_unreachable("Non-canonical and dependent types shouldn't get here");
+
+  case Type::LValueReference:
+  case Type::RValueReference:
+    llvm_unreachable("References shouldn't get here");
+
+  case Type::Auto:
+    llvm_unreachable("Undeduced auto type shouldn't get here");
+
+  case Type::Builtin:
+  // GCC treats vector and complex types as fundamental types.
+  case Type::Vector:
+  case Type::ExtVector:
+  case Type::Complex:
+  case Type::Atomic:
+  // FIXME: GCC treats block pointers as fundamental types?!
+  case Type::BlockPointer:
+    // abi::__fundamental_type_info.
+    VTableName = "_ZTVN10__cxxabiv123__fundamental_type_infoE";
+    break;
+
+  case Type::ConstantArray:
+  case Type::IncompleteArray:
+  case Type::VariableArray:
+    // abi::__array_type_info.
+    VTableName = "_ZTVN10__cxxabiv117__array_type_infoE";
+    break;
+
+  case Type::FunctionNoProto:
+  case Type::FunctionProto:
+    // abi::__function_type_info.
+    VTableName = "_ZTVN10__cxxabiv120__function_type_infoE";
+    break;
+
+  case Type::Enum:
+    // abi::__enum_type_info.
+    VTableName = "_ZTVN10__cxxabiv116__enum_type_infoE";
+    break;
+
+  case Type::Record: {
+    const CXXRecordDecl *RD =
+      cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
+
+    if (!RD->hasDefinition() || !RD->getNumBases()) {
+      VTableName = ClassTypeInfo;
+    } else if (CanUseSingleInheritance(RD)) {
+      VTableName = SIClassTypeInfo;
+    } else {
+      VTableName = VMIClassTypeInfo;
+    }
+
+    break;
+  }
+
+  case Type::ObjCObject:
+    // Ignore protocol qualifiers.
+    Ty = cast<ObjCObjectType>(Ty)->getBaseType().getTypePtr();
+
+    // Handle id and Class.
+    if (isa<BuiltinType>(Ty)) {
+      VTableName = ClassTypeInfo;
+      break;
+    }
+
+    assert(isa<ObjCInterfaceType>(Ty));
+    // Fall through.
+
+  case Type::ObjCInterface:
+    if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
+      VTableName = SIClassTypeInfo;
+    } else {
+      VTableName = ClassTypeInfo;
+    }
+    break;
+
+  case Type::ObjCObjectPointer:
+  case Type::Pointer:
+    // abi::__pointer_type_info.
+    VTableName = "_ZTVN10__cxxabiv119__pointer_type_infoE";
+    break;
+
+  case Type::MemberPointer:
+    // abi::__pointer_to_member_type_info.
+    VTableName = "_ZTVN10__cxxabiv129__pointer_to_member_type_infoE";
+    break;
+  }
+
+  llvm::Constant *VTable =
+    CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy);
+
+  llvm::Type *PtrDiffTy =
+    CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
+
+  // The vtable address point is 2.
+  llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
+  VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, Two);
+  VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);
+
+  Fields.push_back(VTable);
+}
+
+/// \brief Return the linkage that the type info and type info name constants
+/// should have for the given type.
+static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
+                                                             QualType Ty) {
+  // Itanium C++ ABI 2.9.5p7:
+  //   In addition, it and all of the intermediate abi::__pointer_type_info
+  //   structs in the chain down to the abi::__class_type_info for the
+  //   incomplete class type must be prevented from resolving to the
+  //   corresponding type_info structs for the complete class type, possibly
+  //   by making them local static objects. Finally, a dummy class RTTI is
+  //   generated for the incomplete type that will not resolve to the final
+  //   complete class RTTI (because the latter need not exist), possibly by
+  //   making it a local static object.
+  if (ContainsIncompleteClassType(Ty))
+    return llvm::GlobalValue::InternalLinkage;
+
+  switch (Ty->getLinkage()) {
+  case NoLinkage:
+  case InternalLinkage:
+  case UniqueExternalLinkage:
+    return llvm::GlobalValue::InternalLinkage;
+
+  case VisibleNoLinkage:
+  case ExternalLinkage:
+    if (!CGM.getLangOpts().RTTI) {
+      // RTTI is not enabled, which means that this type info struct is going
+      // to be used for exception handling. Give it linkonce_odr linkage.
+      return llvm::GlobalValue::LinkOnceODRLinkage;
+    }
+
+    if (const RecordType *Record = dyn_cast<RecordType>(Ty)) {
+      const CXXRecordDecl *RD = cast<CXXRecordDecl>(Record->getDecl());
+      if (RD->hasAttr<WeakAttr>())
+        return llvm::GlobalValue::WeakODRLinkage;
+      if (RD->isDynamicClass())
+        return CGM.getVTableLinkage(RD);
+    }
+
+    return llvm::GlobalValue::LinkOnceODRLinkage;
+  }
+
+  llvm_unreachable("Invalid linkage!");
+}
+
+llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
+  // We want to operate on the canonical type.
+  Ty = CGM.getContext().getCanonicalType(Ty);
+
+  // Check if we've already emitted an RTTI descriptor for this type.
+  SmallString<256> OutName;
+  llvm::raw_svector_ostream Out(OutName);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
+  Out.flush();
+  StringRef Name = OutName.str();
+
+  llvm::GlobalVariable *OldGV = CGM.getModule().getNamedGlobal(Name);
+  if (OldGV && !OldGV->isDeclaration()) {
+    assert(!OldGV->hasAvailableExternallyLinkage() &&
+           "available_externally typeinfos not yet implemented");
+
+    return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy);
+  }
+
+  // Check if there is already an external RTTI descriptor for this type.
+  bool IsStdLib = IsStandardLibraryRTTIDescriptor(Ty);
+  if (!Force && (IsStdLib || ShouldUseExternalRTTIDescriptor(CGM, Ty)))
+    return GetAddrOfExternalRTTIDescriptor(Ty);
+
+  // Emit the standard library with external linkage.
+  llvm::GlobalVariable::LinkageTypes Linkage;
+  if (IsStdLib)
+    Linkage = llvm::GlobalValue::ExternalLinkage;
+  else
+    Linkage = getTypeInfoLinkage(CGM, Ty);
+
+  // Add the vtable pointer.
+  BuildVTablePointer(cast<Type>(Ty));
+
+  // And the name.
+  llvm::GlobalVariable *TypeName = GetAddrOfTypeName(Ty, Linkage);
+  llvm::Constant *TypeNameField;
+
+  // If we're supposed to demote the visibility, be sure to set a flag
+  // to use a string comparison for type_info comparisons.
+  ItaniumCXXABI::RTTIUniquenessKind RTTIUniqueness =
+      CXXABI.classifyRTTIUniqueness(Ty, Linkage);
+  if (RTTIUniqueness != ItaniumCXXABI::RUK_Unique) {
+    // The flag is the sign bit, which on ARM64 is defined to be clear
+    // for global pointers.  This is very ARM64-specific.
+    TypeNameField = llvm::ConstantExpr::getPtrToInt(TypeName, CGM.Int64Ty);
+    llvm::Constant *flag =
+        llvm::ConstantInt::get(CGM.Int64Ty, ((uint64_t)1) << 63);
+    TypeNameField = llvm::ConstantExpr::getAdd(TypeNameField, flag);
+    TypeNameField =
+        llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy);
+  } else {
+    TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy);
+  }
+  Fields.push_back(TypeNameField);
+
+  switch (Ty->getTypeClass()) {
+#define TYPE(Class, Base)
+#define ABSTRACT_TYPE(Class, Base)
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
+#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
+#define DEPENDENT_TYPE(Class, Base) case Type::Class:
+#include "clang/AST/TypeNodes.def"
+    llvm_unreachable("Non-canonical and dependent types shouldn't get here");
+
+  // GCC treats vector types as fundamental types.
+  case Type::Builtin:
+  case Type::Vector:
+  case Type::ExtVector:
+  case Type::Complex:
+  case Type::BlockPointer:
+    // Itanium C++ ABI 2.9.5p4:
+    // abi::__fundamental_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::LValueReference:
+  case Type::RValueReference:
+    llvm_unreachable("References shouldn't get here");
+
+  case Type::Auto:
+    llvm_unreachable("Undeduced auto type shouldn't get here");
+
+  case Type::ConstantArray:
+  case Type::IncompleteArray:
+  case Type::VariableArray:
+    // Itanium C++ ABI 2.9.5p5:
+    // abi::__array_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::FunctionNoProto:
+  case Type::FunctionProto:
+    // Itanium C++ ABI 2.9.5p5:
+    // abi::__function_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::Enum:
+    // Itanium C++ ABI 2.9.5p5:
+    // abi::__enum_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::Record: {
+    const CXXRecordDecl *RD =
+      cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
+    if (!RD->hasDefinition() || !RD->getNumBases()) {
+      // We don't need to emit any fields.
+      break;
+    }
+
+    if (CanUseSingleInheritance(RD))
+      BuildSIClassTypeInfo(RD);
+    else
+      BuildVMIClassTypeInfo(RD);
+
+    break;
+  }
+
+  case Type::ObjCObject:
+  case Type::ObjCInterface:
+    BuildObjCObjectTypeInfo(cast<ObjCObjectType>(Ty));
+    break;
+
+  case Type::ObjCObjectPointer:
+    BuildPointerTypeInfo(cast<ObjCObjectPointerType>(Ty)->getPointeeType());
+    break;
+
+  case Type::Pointer:
+    BuildPointerTypeInfo(cast<PointerType>(Ty)->getPointeeType());
+    break;
+
+  case Type::MemberPointer:
+    BuildPointerToMemberTypeInfo(cast<MemberPointerType>(Ty));
+    break;
+
+  case Type::Atomic:
+    // No fields, at least for the moment.
+    break;
+  }
+
+  llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields);
+
+  llvm::GlobalVariable *GV =
+    new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
+                             /*Constant=*/true, Linkage, Init, Name);
+
+  // If there's already an old global variable, replace it with the new one.
+  if (OldGV) {
+    GV->takeName(OldGV);
+    llvm::Constant *NewPtr =
+      llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
+    OldGV->replaceAllUsesWith(NewPtr);
+    OldGV->eraseFromParent();
+  }
+
+  // The Itanium ABI specifies that type_info objects must be globally
+  // unique, with one exception: if the type is an incomplete class
+  // type or a (possibly indirect) pointer to one.  That exception
+  // affects the general case of comparing type_info objects produced
+  // by the typeid operator, which is why the comparison operators on
+  // std::type_info generally use the type_info name pointers instead
+  // of the object addresses.  However, the language's built-in uses
+  // of RTTI generally require class types to be complete, even when
+  // manipulating pointers to those class types.  This allows the
+  // implementation of dynamic_cast to rely on address equality tests,
+  // which is much faster.
+
+  // All of this is to say that it's important that both the type_info
+  // object and the type_info name be uniqued when weakly emitted.
+
+  // Give the type_info object and name the formal visibility of the
+  // type itself.
+  llvm::GlobalValue::VisibilityTypes llvmVisibility;
+  if (llvm::GlobalValue::isLocalLinkage(Linkage))
+    // If the linkage is local, only default visibility makes sense.
+    llvmVisibility = llvm::GlobalValue::DefaultVisibility;
+  else if (RTTIUniqueness == ItaniumCXXABI::RUK_NonUniqueHidden)
+    llvmVisibility = llvm::GlobalValue::HiddenVisibility;
+  else
+    llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility());
+  TypeName->setVisibility(llvmVisibility);
+  GV->setVisibility(llvmVisibility);
+
+  return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+}
+
+/// ComputeQualifierFlags - Compute the pointer type info flags from the
+/// given qualifier.
+static unsigned ComputeQualifierFlags(Qualifiers Quals) {
+  unsigned Flags = 0;
+
+  if (Quals.hasConst())
+    Flags |= ItaniumRTTIBuilder::PTI_Const;
+  if (Quals.hasVolatile())
+    Flags |= ItaniumRTTIBuilder::PTI_Volatile;
+  if (Quals.hasRestrict())
+    Flags |= ItaniumRTTIBuilder::PTI_Restrict;
+
+  return Flags;
+}
+
+/// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info
+/// for the given Objective-C object type.
+void ItaniumRTTIBuilder::BuildObjCObjectTypeInfo(const ObjCObjectType *OT) {
+  // Drop qualifiers.
+  const Type *T = OT->getBaseType().getTypePtr();
+  assert(isa<BuiltinType>(T) || isa<ObjCInterfaceType>(T));
+
+  // The builtin types are abi::__class_type_infos and don't require
+  // extra fields.
+  if (isa<BuiltinType>(T)) return;
+
+  ObjCInterfaceDecl *Class = cast<ObjCInterfaceType>(T)->getDecl();
+  ObjCInterfaceDecl *Super = Class->getSuperClass();
+
+  // Root classes are also __class_type_info.
+  if (!Super) return;
+
+  QualType SuperTy = CGM.getContext().getObjCInterfaceType(Super);
+
+  // Everything else is single inheritance.
+  llvm::Constant *BaseTypeInfo =
+      ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(SuperTy);
+  Fields.push_back(BaseTypeInfo);
+}
+
+/// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
+/// inheritance, according to the Itanium C++ ABI, 2.95p6b.
+void ItaniumRTTIBuilder::BuildSIClassTypeInfo(const CXXRecordDecl *RD) {
+  // Itanium C++ ABI 2.9.5p6b:
+  // It adds to abi::__class_type_info a single member pointing to the
+  // type_info structure for the base type,
+  llvm::Constant *BaseTypeInfo =
+    ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(RD->bases_begin()->getType());
+  Fields.push_back(BaseTypeInfo);
+}
+
+namespace {
+  /// SeenBases - Contains virtual and non-virtual bases seen when traversing
+  /// a class hierarchy.
+  struct SeenBases {
+    llvm::SmallPtrSet<const CXXRecordDecl *, 16> NonVirtualBases;
+    llvm::SmallPtrSet<const CXXRecordDecl *, 16> VirtualBases;
+  };
+}
+
+/// ComputeVMIClassTypeInfoFlags - Compute the value of the flags member in
+/// abi::__vmi_class_type_info.
+///
+static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base,
+                                             SeenBases &Bases) {
+
+  unsigned Flags = 0;
+
+  const CXXRecordDecl *BaseDecl =
+    cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
+
+  if (Base->isVirtual()) {
+    // Mark the virtual base as seen.
+    if (!Bases.VirtualBases.insert(BaseDecl)) {
+      // If this virtual base has been seen before, then the class is diamond
+      // shaped.
+      Flags |= ItaniumRTTIBuilder::VMI_DiamondShaped;
+    } else {
+      if (Bases.NonVirtualBases.count(BaseDecl))
+        Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
+    }
+  } else {
+    // Mark the non-virtual base as seen.
+    if (!Bases.NonVirtualBases.insert(BaseDecl)) {
+      // If this non-virtual base has been seen before, then the class has non-
+      // diamond shaped repeated inheritance.
+      Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
+    } else {
+      if (Bases.VirtualBases.count(BaseDecl))
+        Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
+    }
+  }
+
+  // Walk all bases.
+  for (const auto &I : BaseDecl->bases())
+    Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
+
+  return Flags;
+}
+
+static unsigned ComputeVMIClassTypeInfoFlags(const CXXRecordDecl *RD) {
+  unsigned Flags = 0;
+  SeenBases Bases;
+
+  // Walk all bases.
+  for (const auto &I : RD->bases())
+    Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
+
+  return Flags;
+}
+
+/// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
+/// classes with bases that do not satisfy the abi::__si_class_type_info
+/// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
+void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
+  llvm::Type *UnsignedIntLTy =
+    CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
+
+  // Itanium C++ ABI 2.9.5p6c:
+  //   __flags is a word with flags describing details about the class
+  //   structure, which may be referenced by using the __flags_masks
+  //   enumeration. These flags refer to both direct and indirect bases.
+  unsigned Flags = ComputeVMIClassTypeInfoFlags(RD);
+  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
+
+  // Itanium C++ ABI 2.9.5p6c:
+  //   __base_count is a word with the number of direct proper base class
+  //   descriptions that follow.
+  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, RD->getNumBases()));
+
+  if (!RD->getNumBases())
+    return;
+
+  llvm::Type *LongLTy =
+    CGM.getTypes().ConvertType(CGM.getContext().LongTy);
+
+  // Now add the base class descriptions.
+
+  // Itanium C++ ABI 2.9.5p6c:
+  //   __base_info[] is an array of base class descriptions -- one for every
+  //   direct proper base. Each description is of the type:
+  //
+  //   struct abi::__base_class_type_info {
+  //   public:
+  //     const __class_type_info *__base_type;
+  //     long __offset_flags;
+  //
+  //     enum __offset_flags_masks {
+  //       __virtual_mask = 0x1,
+  //       __public_mask = 0x2,
+  //       __offset_shift = 8
+  //     };
+  //   };
+  for (const auto &Base : RD->bases()) {
+    // The __base_type member points to the RTTI for the base type.
+    Fields.push_back(ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(Base.getType()));
+
+    const CXXRecordDecl *BaseDecl =
+      cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
+
+    int64_t OffsetFlags = 0;
+
+    // All but the lower 8 bits of __offset_flags are a signed offset.
+    // For a non-virtual base, this is the offset in the object of the base
+    // subobject. For a virtual base, this is the offset in the virtual table of
+    // the virtual base offset for the virtual base referenced (negative).
+    CharUnits Offset;
+    if (Base.isVirtual())
+      Offset =
+        CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(RD, BaseDecl);
+    else {
+      const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
+      Offset = Layout.getBaseClassOffset(BaseDecl);
+    };
+
+    OffsetFlags = uint64_t(Offset.getQuantity()) << 8;
+
+    // The low-order byte of __offset_flags contains flags, as given by the
+    // masks from the enumeration __offset_flags_masks.
+    if (Base.isVirtual())
+      OffsetFlags |= BCTI_Virtual;
+    if (Base.getAccessSpecifier() == AS_public)
+      OffsetFlags |= BCTI_Public;
+
+    Fields.push_back(llvm::ConstantInt::get(LongLTy, OffsetFlags));
+  }
+}
+
+/// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct,
+/// used for pointer types.
+void ItaniumRTTIBuilder::BuildPointerTypeInfo(QualType PointeeTy) {
+  Qualifiers Quals;
+  QualType UnqualifiedPointeeTy =
+    CGM.getContext().getUnqualifiedArrayType(PointeeTy, Quals);
+
+  // Itanium C++ ABI 2.9.5p7:
+  //   __flags is a flag word describing the cv-qualification and other
+  //   attributes of the type pointed to
+  unsigned Flags = ComputeQualifierFlags(Quals);
+
+  // Itanium C++ ABI 2.9.5p7:
+  //   When the abi::__pbase_type_info is for a direct or indirect pointer to an
+  //   incomplete class type, the incomplete target type flag is set.
+  if (ContainsIncompleteClassType(UnqualifiedPointeeTy))
+    Flags |= PTI_Incomplete;
+
+  llvm::Type *UnsignedIntLTy =
+    CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
+  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
+
+  // Itanium C++ ABI 2.9.5p7:
+  //  __pointee is a pointer to the std::type_info derivation for the
+  //  unqualified type being pointed to.
+  llvm::Constant *PointeeTypeInfo =
+    ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(UnqualifiedPointeeTy);
+  Fields.push_back(PointeeTypeInfo);
+}
+
+/// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info
+/// struct, used for member pointer types.
+void
+ItaniumRTTIBuilder::BuildPointerToMemberTypeInfo(const MemberPointerType *Ty) {
+  QualType PointeeTy = Ty->getPointeeType();
+
+  Qualifiers Quals;
+  QualType UnqualifiedPointeeTy =
+    CGM.getContext().getUnqualifiedArrayType(PointeeTy, Quals);
+
+  // Itanium C++ ABI 2.9.5p7:
+  //   __flags is a flag word describing the cv-qualification and other
+  //   attributes of the type pointed to.
+  unsigned Flags = ComputeQualifierFlags(Quals);
+
+  const RecordType *ClassType = cast<RecordType>(Ty->getClass());
+
+  // Itanium C++ ABI 2.9.5p7:
+  //   When the abi::__pbase_type_info is for a direct or indirect pointer to an
+  //   incomplete class type, the incomplete target type flag is set.
+  if (ContainsIncompleteClassType(UnqualifiedPointeeTy))
+    Flags |= PTI_Incomplete;
+
+  if (IsIncompleteClassType(ClassType))
+    Flags |= PTI_ContainingClassIncomplete;
+
+  llvm::Type *UnsignedIntLTy =
+    CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
+  Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));
+
+  // Itanium C++ ABI 2.9.5p7:
+  //   __pointee is a pointer to the std::type_info derivation for the
+  //   unqualified type being pointed to.
+  llvm::Constant *PointeeTypeInfo =
+    ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(UnqualifiedPointeeTy);
+  Fields.push_back(PointeeTypeInfo);
+
+  // Itanium C++ ABI 2.9.5p9:
+  //   __context is a pointer to an abi::__class_type_info corresponding to the
+  //   class type containing the member pointed to
+  //   (e.g., the "A" in "int A::*").
+  Fields.push_back(
+      ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(QualType(ClassType, 0)));
+}
+
+llvm::Constant *ItaniumCXXABI::getAddrOfRTTIDescriptor(QualType Ty) {
+  return ItaniumRTTIBuilder(*this).BuildTypeInfo(Ty);
+}
+
+void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type) {
+  QualType PointerType = getContext().getPointerType(Type);
+  QualType PointerTypeConst = getContext().getPointerType(Type.withConst());
+  ItaniumRTTIBuilder(*this).BuildTypeInfo(Type, true);
+  ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerType, true);
+  ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerTypeConst, true);
+}
+
+void ItaniumCXXABI::EmitFundamentalRTTIDescriptors() {
+  QualType FundamentalTypes[] = {
+      getContext().VoidTy,             getContext().NullPtrTy,
+      getContext().BoolTy,             getContext().WCharTy,
+      getContext().CharTy,             getContext().UnsignedCharTy,
+      getContext().SignedCharTy,       getContext().ShortTy,
+      getContext().UnsignedShortTy,    getContext().IntTy,
+      getContext().UnsignedIntTy,      getContext().LongTy,
+      getContext().UnsignedLongTy,     getContext().LongLongTy,
+      getContext().UnsignedLongLongTy, getContext().HalfTy,
+      getContext().FloatTy,            getContext().DoubleTy,
+      getContext().LongDoubleTy,       getContext().Char16Ty,
+      getContext().Char32Ty,
+  };
+  for (const QualType &FundamentalType : FundamentalTypes)
+    EmitFundamentalRTTIDescriptor(FundamentalType);
+}
+
+/// What sort of uniqueness rules should we use for the RTTI for the
+/// given type?
+ItaniumCXXABI::RTTIUniquenessKind ItaniumCXXABI::classifyRTTIUniqueness(
+    QualType CanTy, llvm::GlobalValue::LinkageTypes Linkage) const {
+  if (shouldRTTIBeUnique())
+    return RUK_Unique;
+
+  // It's only necessary for linkonce_odr or weak_odr linkage.
+  if (Linkage != llvm::GlobalValue::LinkOnceODRLinkage &&
+      Linkage != llvm::GlobalValue::WeakODRLinkage)
+    return RUK_Unique;
+
+  // It's only necessary with default visibility.
+  if (CanTy->getVisibility() != DefaultVisibility)
+    return RUK_Unique;
+
+  // If we're not required to publish this symbol, hide it.
+  if (Linkage == llvm::GlobalValue::LinkOnceODRLinkage)
+    return RUK_NonUniqueHidden;
+
+  // If we're required to publish this symbol, as we might be under an
+  // explicit instantiation, leave it with default visibility but
+  // enable string-comparisons.
+  assert(Linkage == llvm::GlobalValue::WeakODRLinkage);
+  return RUK_NonUniqueVisible;
+}
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 7318fe72a3c3..a69d4dd3fe86 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -15,64 +15,92 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGCXXABI.h"
-#include "CodeGenModule.h"
 #include "CGVTables.h"
-#include "MicrosoftVBTables.h"
+#include "CodeGenModule.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/VTableBuilder.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/IR/CallSite.h"
 
 using namespace clang;
 using namespace CodeGen;
 
 namespace {
 
+/// Holds all the vbtable globals for a given class.
+struct VBTableGlobals {
+  const VPtrInfoVector *VBTables;
+  SmallVector<llvm::GlobalVariable *, 2> Globals;
+};
+
 class MicrosoftCXXABI : public CGCXXABI {
 public:
-  MicrosoftCXXABI(CodeGenModule &CGM) : CGCXXABI(CGM) {}
+  MicrosoftCXXABI(CodeGenModule &CGM)
+      : CGCXXABI(CGM), BaseClassDescriptorType(nullptr),
+        ClassHierarchyDescriptorType(nullptr),
+        CompleteObjectLocatorType(nullptr) {}
 
-  bool HasThisReturn(GlobalDecl GD) const;
+  bool HasThisReturn(GlobalDecl GD) const override;
 
-  bool isReturnTypeIndirect(const CXXRecordDecl *RD) const {
-    // Structures that are not C++03 PODs are always indirect.
-    return !RD->isPOD();
-  }
+  bool classifyReturnType(CGFunctionInfo &FI) const override;
 
-  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const {
-    if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialDestructor())
-      return RAA_DirectInMemory;
-    return RAA_Default;
-  }
+  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override;
 
-  StringRef GetPureVirtualCallName() { return "_purecall"; }
+  bool isSRetParameterAfterThis() const override { return true; }
+
+  StringRef GetPureVirtualCallName() override { return "_purecall"; }
   // No known support for deleted functions in MSVC yet, so this choice is
   // arbitrary.
-  StringRef GetDeletedVirtualCallName() { return "_purecall"; }
-
-  bool isInlineInitializedStaticDataMemberLinkOnce() { return true; }
+  StringRef GetDeletedVirtualCallName() override { return "_purecall"; }
 
   llvm::Value *adjustToCompleteObject(CodeGenFunction &CGF,
                                       llvm::Value *ptr,
-                                      QualType type);
+                                      QualType type) override;
 
-  llvm::Value *GetVirtualBaseClassOffset(CodeGenFunction &CGF,
-                                         llvm::Value *This,
-                                         const CXXRecordDecl *ClassDecl,
-                                         const CXXRecordDecl *BaseClassDecl);
+  llvm::GlobalVariable *getMSCompleteObjectLocator(const CXXRecordDecl *RD,
+                                                   const VPtrInfo *Info);
+
+  llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
+
+  bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
+  void EmitBadTypeidCall(CodeGenFunction &CGF) override;
+  llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
+                          llvm::Value *ThisPtr,
+                          llvm::Type *StdTypeInfoPtrTy) override;
+
+  bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
+                                          QualType SrcRecordTy) override;
+
+  llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
+                                   QualType SrcRecordTy, QualType DestTy,
+                                   QualType DestRecordTy,
+                                   llvm::BasicBlock *CastEnd) override;
+
+  llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *Value,
+                                     QualType SrcRecordTy,
+                                     QualType DestTy) override;
+
+  bool EmitBadCastCall(CodeGenFunction &CGF) override;
+
+  llvm::Value *
+  GetVirtualBaseClassOffset(CodeGenFunction &CGF, llvm::Value *This,
+                            const CXXRecordDecl *ClassDecl,
+                            const CXXRecordDecl *BaseClassDecl) override;
 
   void BuildConstructorSignature(const CXXConstructorDecl *Ctor,
-                                 CXXCtorType Type,
-                                 CanQualType &ResTy,
-                                 SmallVectorImpl<CanQualType> &ArgTys);
+                                 CXXCtorType Type, CanQualType &ResTy,
+                                 SmallVectorImpl<CanQualType> &ArgTys) override;
 
-  llvm::BasicBlock *EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
-                                                  const CXXRecordDecl *RD);
+  llvm::BasicBlock *
+  EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
+                                const CXXRecordDecl *RD) override;
 
   void initializeHiddenVirtualInheritanceMembers(CodeGenFunction &CGF,
-                                                 const CXXRecordDecl *RD);
+                                              const CXXRecordDecl *RD) override;
 
-  void EmitCXXConstructors(const CXXConstructorDecl *D);
+  void EmitCXXConstructors(const CXXConstructorDecl *D) override;
 
   // Background on MSVC destructors
   // ==============================
@@ -109,17 +137,18 @@ public:
   void BuildDestructorSignature(const CXXDestructorDecl *Dtor,
                                 CXXDtorType Type,
                                 CanQualType &ResTy,
-                                SmallVectorImpl<CanQualType> &ArgTys);
+                                SmallVectorImpl<CanQualType> &ArgTys) override;
 
   /// Non-base dtors should be emitted as delegating thunks in this ABI.
   bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
-                              CXXDtorType DT) const {
+                              CXXDtorType DT) const override {
     return DT != Dtor_Base;
   }
 
-  void EmitCXXDestructors(const CXXDestructorDecl *D);
+  void EmitCXXDestructors(const CXXDestructorDecl *D) override;
 
-  const CXXRecordDecl *getThisArgumentTypeForMethod(const CXXMethodDecl *MD) {
+  const CXXRecordDecl *
+  getThisArgumentTypeForMethod(const CXXMethodDecl *MD) override {
     MD = MD->getCanonicalDecl();
     if (MD->isVirtual() && !isa<CXXDestructorDecl>(MD)) {
       MicrosoftVTableContext::MethodVFTableLocation ML =
@@ -131,76 +160,100 @@ public:
       // FIXME: might want to have a more precise type in the non-virtual
       // multiple inheritance case.
       if (ML.VBase || !ML.VFPtrOffset.isZero())
-        return 0;
+        return nullptr;
     }
     return MD->getParent();
   }
 
-  llvm::Value *adjustThisArgumentForVirtualCall(CodeGenFunction &CGF,
-                                                GlobalDecl GD,
-                                                llvm::Value *This);
+  llvm::Value *
+  adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
+                                           llvm::Value *This,
+                                           bool VirtualCall) override;
 
-  void BuildInstanceFunctionParams(CodeGenFunction &CGF,
-                                   QualType &ResTy,
-                                   FunctionArgList &Params);
+  void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
+                                 FunctionArgList &Params) override;
 
   llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
-      CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This);
+      CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) override;
 
-  void EmitInstanceFunctionProlog(CodeGenFunction &CGF);
+  void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;
 
-  void EmitConstructorCall(CodeGenFunction &CGF,
-                           const CXXConstructorDecl *D, CXXCtorType Type,
-                           bool ForVirtualBase, bool Delegating,
-                           llvm::Value *This,
-                           CallExpr::const_arg_iterator ArgBeg,
-                           CallExpr::const_arg_iterator ArgEnd);
+  unsigned addImplicitConstructorArgs(CodeGenFunction &CGF,
+                                      const CXXConstructorDecl *D,
+                                      CXXCtorType Type, bool ForVirtualBase,
+                                      bool Delegating,
+                                      CallArgList &Args) override;
 
-  void emitVTableDefinitions(CodeGenVTables &CGVT, const CXXRecordDecl *RD);
+  void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
+                          CXXDtorType Type, bool ForVirtualBase,
+                          bool Delegating, llvm::Value *This) override;
+
+  void emitVTableDefinitions(CodeGenVTables &CGVT,
+                             const CXXRecordDecl *RD) override;
 
   llvm::Value *getVTableAddressPointInStructor(
       CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
       BaseSubobject Base, const CXXRecordDecl *NearestVBase,
-      bool &NeedsVirtualOffset);
+      bool &NeedsVirtualOffset) override;
 
   llvm::Constant *
   getVTableAddressPointForConstExpr(BaseSubobject Base,
-                                    const CXXRecordDecl *VTableClass);
+                                    const CXXRecordDecl *VTableClass) override;
 
   llvm::GlobalVariable *getAddrOfVTable(const CXXRecordDecl *RD,
-                                        CharUnits VPtrOffset);
+                                        CharUnits VPtrOffset) override;
 
   llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
-                                         llvm::Value *This, llvm::Type *Ty);
+                                         llvm::Value *This,
+                                         llvm::Type *Ty) override;
 
   void EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                  const CXXDestructorDecl *Dtor,
                                  CXXDtorType DtorType, SourceLocation CallLoc,
-                                 llvm::Value *This);
+                                 llvm::Value *This) override;
 
   void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
-                                        CallArgList &CallArgs) {
+                                        CallArgList &CallArgs) override {
     assert(GD.getDtorType() == Dtor_Deleting &&
            "Only deleting destructor thunks are available in this ABI");
     CallArgs.add(RValue::get(getStructorImplicitParamValue(CGF)),
                              CGM.getContext().IntTy);
   }
 
-  void emitVirtualInheritanceTables(const CXXRecordDecl *RD);
+  void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
+
+  llvm::GlobalVariable *
+  getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
+                   llvm::GlobalVariable::LinkageTypes Linkage);
+
+  void emitVBTableDefinition(const VPtrInfo &VBT, const CXXRecordDecl *RD,
+                             llvm::GlobalVariable *GV) const;
+
+  void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
+                       GlobalDecl GD, bool ReturnAdjustment) override {
+    // Never dllimport/dllexport thunks.
+    Thunk->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
 
-  void setThunkLinkage(llvm::Function *Thunk, bool ForVTable) {
-    Thunk->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+    GVALinkage Linkage =
+        getContext().GetGVALinkageForFunction(cast<FunctionDecl>(GD.getDecl()));
+
+    if (Linkage == GVA_Internal)
+      Thunk->setLinkage(llvm::GlobalValue::InternalLinkage);
+    else if (ReturnAdjustment)
+      Thunk->setLinkage(llvm::GlobalValue::WeakODRLinkage);
+    else
+      Thunk->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
   }
 
   llvm::Value *performThisAdjustment(CodeGenFunction &CGF, llvm::Value *This,
-                                     const ThisAdjustment &TA);
+                                     const ThisAdjustment &TA) override;
 
   llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
-                                       const ReturnAdjustment &RA);
+                                       const ReturnAdjustment &RA) override;
 
   void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
                        llvm::GlobalVariable *DeclPtr,
-                       bool PerformInit);
+                       bool PerformInit) override;
 
   // ==== Notes on array cookies =========
   //
@@ -225,17 +278,126 @@ public:
   //   }
   // Whereas it prints "104" and "104" if you give A a destructor.
 
-  bool requiresArrayCookie(const CXXDeleteExpr *expr, QualType elementType);
-  bool requiresArrayCookie(const CXXNewExpr *expr);
-  CharUnits getArrayCookieSizeImpl(QualType type);
+  bool requiresArrayCookie(const CXXDeleteExpr *expr,
+                           QualType elementType) override;
+  bool requiresArrayCookie(const CXXNewExpr *expr) override;
+  CharUnits getArrayCookieSizeImpl(QualType type) override;
   llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
                                      llvm::Value *NewPtr,
                                      llvm::Value *NumElements,
                                      const CXXNewExpr *expr,
-                                     QualType ElementType);
+                                     QualType ElementType) override;
   llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
                                    llvm::Value *allocPtr,
-                                   CharUnits cookieSize);
+                                   CharUnits cookieSize) override;
+
+  friend struct MSRTTIBuilder;
+
+  bool isImageRelative() const {
+    return CGM.getTarget().getPointerWidth(/*AddressSpace=*/0) == 64;
+  }
+
+  // 5 routines for constructing the llvm types for MS RTTI structs.
+  llvm::StructType *getTypeDescriptorType(StringRef TypeInfoString) {
+    llvm::SmallString<32> TDTypeName("rtti.TypeDescriptor");
+    TDTypeName += llvm::utostr(TypeInfoString.size());
+    llvm::StructType *&TypeDescriptorType =
+        TypeDescriptorTypeMap[TypeInfoString.size()];
+    if (TypeDescriptorType)
+      return TypeDescriptorType;
+    llvm::Type *FieldTypes[] = {
+        CGM.Int8PtrPtrTy,
+        CGM.Int8PtrTy,
+        llvm::ArrayType::get(CGM.Int8Ty, TypeInfoString.size() + 1)};
+    TypeDescriptorType =
+        llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, TDTypeName);
+    return TypeDescriptorType;
+  }
+
+  llvm::Type *getImageRelativeType(llvm::Type *PtrType) {
+    if (!isImageRelative())
+      return PtrType;
+    return CGM.IntTy;
+  }
+
+  llvm::StructType *getBaseClassDescriptorType() {
+    if (BaseClassDescriptorType)
+      return BaseClassDescriptorType;
+    llvm::Type *FieldTypes[] = {
+        getImageRelativeType(CGM.Int8PtrTy),
+        CGM.IntTy,
+        CGM.IntTy,
+        CGM.IntTy,
+        CGM.IntTy,
+        CGM.IntTy,
+        getImageRelativeType(getClassHierarchyDescriptorType()->getPointerTo()),
+    };
+    BaseClassDescriptorType = llvm::StructType::create(
+        CGM.getLLVMContext(), FieldTypes, "rtti.BaseClassDescriptor");
+    return BaseClassDescriptorType;
+  }
+
+  llvm::StructType *getClassHierarchyDescriptorType() {
+    if (ClassHierarchyDescriptorType)
+      return ClassHierarchyDescriptorType;
+    // Forward-declare RTTIClassHierarchyDescriptor to break a cycle.
+    ClassHierarchyDescriptorType = llvm::StructType::create(
+        CGM.getLLVMContext(), "rtti.ClassHierarchyDescriptor");
+    llvm::Type *FieldTypes[] = {
+        CGM.IntTy,
+        CGM.IntTy,
+        CGM.IntTy,
+        getImageRelativeType(
+            getBaseClassDescriptorType()->getPointerTo()->getPointerTo()),
+    };
+    ClassHierarchyDescriptorType->setBody(FieldTypes);
+    return ClassHierarchyDescriptorType;
+  }
+
+  llvm::StructType *getCompleteObjectLocatorType() {
+    if (CompleteObjectLocatorType)
+      return CompleteObjectLocatorType;
+    CompleteObjectLocatorType = llvm::StructType::create(
+        CGM.getLLVMContext(), "rtti.CompleteObjectLocator");
+    llvm::Type *FieldTypes[] = {
+        CGM.IntTy,
+        CGM.IntTy,
+        CGM.IntTy,
+        getImageRelativeType(CGM.Int8PtrTy),
+        getImageRelativeType(getClassHierarchyDescriptorType()->getPointerTo()),
+        getImageRelativeType(CompleteObjectLocatorType),
+    };
+    llvm::ArrayRef<llvm::Type *> FieldTypesRef(FieldTypes);
+    if (!isImageRelative())
+      FieldTypesRef = FieldTypesRef.drop_back();
+    CompleteObjectLocatorType->setBody(FieldTypesRef);
+    return CompleteObjectLocatorType;
+  }
+
+  llvm::GlobalVariable *getImageBase() {
+    StringRef Name = "__ImageBase";
+    if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name))
+      return GV;
+
+    return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty,
+                                    /*isConstant=*/true,
+                                    llvm::GlobalValue::ExternalLinkage,
+                                    /*Initializer=*/nullptr, Name);
+  }
+
+  llvm::Constant *getImageRelativeConstant(llvm::Constant *PtrVal) {
+    if (!isImageRelative())
+      return PtrVal;
+
+    llvm::Constant *ImageBaseAsInt =
+        llvm::ConstantExpr::getPtrToInt(getImageBase(), CGM.IntPtrTy);
+    llvm::Constant *PtrValAsInt =
+        llvm::ConstantExpr::getPtrToInt(PtrVal, CGM.IntPtrTy);
+    llvm::Constant *Diff =
+        llvm::ConstantExpr::getSub(PtrValAsInt, ImageBaseAsInt,
+                                   /*HasNUW=*/true, /*HasNSW=*/true);
+    return llvm::ConstantExpr::getTrunc(Diff, CGM.IntTy);
+  }
 
 private:
   MicrosoftMangleContext &getMangleContext() {
@@ -258,15 +420,12 @@ private:
     return C ? C : getZeroInt();
   }
 
+  CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD);
+
   void
   GetNullMemberPointerFields(const MemberPointerType *MPT,
                              llvm::SmallVectorImpl<llvm::Constant *> &fields);
 
-  /// \brief Finds the offset from the base of RD to the vbptr it uses, even if
-  /// it is reusing a vbptr from a non-virtual base.  RD must have morally
-  /// virtual bases.
-  CharUnits GetVBPtrOffsetFromBases(const CXXRecordDecl *RD);
-
   /// \brief Shared code for virtual base adjustment.  Returns the offset from
   /// the vbptr to the virtual base.  Optionally returns the address of the
   /// vbptr itself.
@@ -274,13 +433,13 @@ private:
                                        llvm::Value *Base,
                                        llvm::Value *VBPtrOffset,
                                        llvm::Value *VBTableOffset,
-                                       llvm::Value **VBPtr = 0);
+                                       llvm::Value **VBPtr = nullptr);
 
   llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
                                        llvm::Value *Base,
                                        int32_t VBPtrOffset,
                                        int32_t VBTableOffset,
-                                       llvm::Value **VBPtr = 0) {
+                                       llvm::Value **VBPtr = nullptr) {
     llvm::Value *VBPOffset = llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
                 *VBTOffset = llvm::ConstantInt::get(CGM.IntTy, VBTableOffset);
     return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr);
@@ -288,8 +447,8 @@ private:
 
   /// \brief Performs a full virtual base adjustment.  Used to dereference
   /// pointers to members of virtual bases.
-  llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const CXXRecordDecl *RD,
-                                 llvm::Value *Base,
+  llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const Expr *E,
+                                 const CXXRecordDecl *RD, llvm::Value *Base,
                                  llvm::Value *VirtualBaseAdjustmentOffset,
                                  llvm::Value *VBPtrOffset /* optional */);
 
@@ -311,69 +470,71 @@ private:
   void EmitVBPtrStores(CodeGenFunction &CGF, const CXXRecordDecl *RD);
 
   /// \brief Caching wrapper around VBTableBuilder::enumerateVBTables().
-  const VBTableVector &EnumerateVBTables(const CXXRecordDecl *RD);
+  const VBTableGlobals &enumerateVBTables(const CXXRecordDecl *RD);
 
   /// \brief Generate a thunk for calling a virtual member function MD.
-  llvm::Function *EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
-                                         StringRef ThunkName);
+  llvm::Function *EmitVirtualMemPtrThunk(
+      const CXXMethodDecl *MD,
+      const MicrosoftVTableContext::MethodVFTableLocation &ML);
 
 public:
-  virtual llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT);
+  llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
 
-  virtual bool isZeroInitializable(const MemberPointerType *MPT);
+  bool isZeroInitializable(const MemberPointerType *MPT) override;
 
-  virtual llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT);
+  llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
 
-  virtual llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
-                                                CharUnits offset);
-  virtual llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD);
-  virtual llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT);
+  llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
+                                        CharUnits offset) override;
+  llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD) override;
+  llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
 
-  virtual llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
-                                                   llvm::Value *L,
-                                                   llvm::Value *R,
-                                                   const MemberPointerType *MPT,
-                                                   bool Inequality);
+  llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
+                                           llvm::Value *L,
+                                           llvm::Value *R,
+                                           const MemberPointerType *MPT,
+                                           bool Inequality) override;
 
-  virtual llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
-                                                  llvm::Value *MemPtr,
-                                                  const MemberPointerType *MPT);
+  llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
+                                          llvm::Value *MemPtr,
+                                          const MemberPointerType *MPT) override;
 
-  virtual llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF,
-                                                    llvm::Value *Base,
-                                                    llvm::Value *MemPtr,
-                                                  const MemberPointerType *MPT);
+  llvm::Value *
+  EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+                               llvm::Value *Base, llvm::Value *MemPtr,
+                               const MemberPointerType *MPT) override;
 
-  virtual llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
-                                                   const CastExpr *E,
-                                                   llvm::Value *Src);
+  llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
+                                           const CastExpr *E,
+                                           llvm::Value *Src) override;
 
-  virtual llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
-                                                      llvm::Constant *Src);
+  llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
+                                              llvm::Constant *Src) override;
 
-  virtual llvm::Value *
-  EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
-                                  llvm::Value *&This,
-                                  llvm::Value *MemPtr,
-                                  const MemberPointerType *MPT);
+  llvm::Value *
+  EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF, const Expr *E,
+                                  llvm::Value *&This, llvm::Value *MemPtr,
+                                  const MemberPointerType *MPT) override;
 
 private:
   typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy;
-  typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VFTablesMapTy;
+  typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy;
+  typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalValue *> VFTablesMapTy;
   /// \brief All the vftables that have been referenced.
   VFTablesMapTy VFTablesMap;
+  VTablesMapTy VTablesMap;
 
   /// \brief This set holds the record decls we've deferred vtable emission for.
   llvm::SmallPtrSet<const CXXRecordDecl *, 4> DeferredVFTables;
 
 
   /// \brief All the vbtables which have been referenced.
-  llvm::DenseMap<const CXXRecordDecl *, VBTableVector> VBTablesMap;
+  llvm::DenseMap<const CXXRecordDecl *, VBTableGlobals> VBTablesMap;
 
   /// Info on the global variable used to guard initialization of static locals.
   /// The BitIndex field is only used for externally invisible declarations.
   struct GuardInfo {
-    GuardInfo() : Guard(0), BitIndex(0) {}
+    GuardInfo() : Guard(nullptr), BitIndex(0) {}
     llvm::GlobalVariable *Guard;
     unsigned BitIndex;
   };
@@ -381,10 +542,69 @@ private:
   /// Map from DeclContext to the current guard variable.  We assume that the
   /// AST is visited in source code order.
   llvm::DenseMap<const DeclContext *, GuardInfo> GuardVariableMap;
+
+  llvm::DenseMap<size_t, llvm::StructType *> TypeDescriptorTypeMap;
+  llvm::StructType *BaseClassDescriptorType;
+  llvm::StructType *ClassHierarchyDescriptorType;
+  llvm::StructType *CompleteObjectLocatorType;
 };
 
 }
 
+CGCXXABI::RecordArgABI
+MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
+  switch (CGM.getTarget().getTriple().getArch()) {
+  default:
+    // FIXME: Implement for other architectures.
+    return RAA_Default;
+
+  case llvm::Triple::x86:
+    // All record arguments are passed in memory on x86.  Decide whether to
+    // construct the object directly in argument memory, or to construct the
+    // argument elsewhere and copy the bytes during the call.
+
+    // If C++ prohibits us from making a copy, construct the arguments directly
+    // into argument memory.
+    if (!canCopyArgument(RD))
+      return RAA_DirectInMemory;
+
+    // Otherwise, construct the argument into a temporary and copy the bytes
+    // into the outgoing argument memory.
+    return RAA_Default;
+
+  case llvm::Triple::x86_64:
+    // Win64 passes objects with non-trivial copy ctors indirectly.
+    if (RD->hasNonTrivialCopyConstructor())
+      return RAA_Indirect;
+
+    // Win64 passes objects larger than 8 bytes indirectly.
+    if (getContext().getTypeSize(RD->getTypeForDecl()) > 64)
+      return RAA_Indirect;
+
+    // We have a trivial copy constructor or no copy constructors, but we have
+    // to make sure it isn't deleted.
+    bool CopyDeleted = false;
+    for (const CXXConstructorDecl *CD : RD->ctors()) {
+      if (CD->isCopyConstructor()) {
+        assert(CD->isTrivial());
+        // We had at least one undeleted trivial copy ctor.  Return directly.
+        if (!CD->isDeleted())
+          return RAA_Default;
+        CopyDeleted = true;
+      }
+    }
+
+    // The trivial copy constructor was deleted.  Return indirectly.
+    if (CopyDeleted)
+      return RAA_Indirect;
+
+    // There were no copy ctors.  Return in RAX.
+    return RAA_Default;
+  }
+
+  llvm_unreachable("invalid enum");
+}
+
 llvm::Value *MicrosoftCXXABI::adjustToCompleteObject(CodeGenFunction &CGF,
                                                      llvm::Value *ptr,
                                                      QualType type) {
@@ -392,33 +612,127 @@ llvm::Value *MicrosoftCXXABI::adjustToCompleteObject(CodeGenFunction &CGF,
   return ptr;
 }
 
-/// \brief Finds the first non-virtual base of RD that has virtual bases.  If RD
-/// doesn't have a vbptr, it will reuse the vbptr of the returned class.
-static const CXXRecordDecl *FindFirstNVBaseWithVBases(const CXXRecordDecl *RD) {
-  for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(),
-       E = RD->bases_end(); I != E; ++I) {
-    const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl();
-    if (!I->isVirtual() && Base->getNumVBases() > 0)
-      return Base;
-  }
-  llvm_unreachable("RD must have an nv base with vbases");
+/// \brief Gets the offset to the virtual base that contains the vfptr for
+/// MS-ABI polymorphic types.
+static llvm::Value *getPolymorphicOffset(CodeGenFunction &CGF,
+                                         const CXXRecordDecl *RD,
+                                         llvm::Value *Value) {
+  const ASTContext &Context = RD->getASTContext();
+  for (const CXXBaseSpecifier &Base : RD->vbases())
+    if (Context.getASTRecordLayout(Base.getType()->getAsCXXRecordDecl())
+            .hasExtendableVFPtr())
+      return CGF.CGM.getCXXABI().GetVirtualBaseClassOffset(
+          CGF, Value, RD, Base.getType()->getAsCXXRecordDecl());
+  llvm_unreachable("One of our vbases should be polymorphic.");
 }
 
-CharUnits MicrosoftCXXABI::GetVBPtrOffsetFromBases(const CXXRecordDecl *RD) {
-  assert(RD->getNumVBases());
-  CharUnits Total = CharUnits::Zero();
-  while (RD) {
-    const ASTRecordLayout &RDLayout = getContext().getASTRecordLayout(RD);
-    CharUnits VBPtrOffset = RDLayout.getVBPtrOffset();
-    // -1 is the sentinel for no vbptr.
-    if (VBPtrOffset != CharUnits::fromQuantity(-1)) {
-      Total += VBPtrOffset;
-      break;
-    }
-    RD = FindFirstNVBaseWithVBases(RD);
-    Total += RDLayout.getBaseClassOffset(RD);
-  }
-  return Total;
+static std::pair<llvm::Value *, llvm::Value *>
+performBaseAdjustment(CodeGenFunction &CGF, llvm::Value *Value,
+                      QualType SrcRecordTy) {
+  Value = CGF.Builder.CreateBitCast(Value, CGF.Int8PtrTy);
+  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+
+  if (CGF.getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr())
+    return std::make_pair(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0));
+
+  // Perform a base adjustment.
+  llvm::Value *Offset = getPolymorphicOffset(CGF, SrcDecl, Value);
+  Value = CGF.Builder.CreateInBoundsGEP(Value, Offset);
+  Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
+  return std::make_pair(Value, Offset);
+}
+
+bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
+                                                QualType SrcRecordTy) {
+  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  return IsDeref &&
+         !CGM.getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
+}
+
+static llvm::CallSite emitRTtypeidCall(CodeGenFunction &CGF,
+                                       llvm::Value *Argument) {
+  llvm::Type *ArgTypes[] = {CGF.Int8PtrTy};
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false);
+  llvm::Value *Args[] = {Argument};
+  llvm::Constant *Fn = CGF.CGM.CreateRuntimeFunction(FTy, "__RTtypeid");
+  return CGF.EmitRuntimeCallOrInvoke(Fn, Args);
+}
+
+void MicrosoftCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
+  llvm::CallSite Call =
+      emitRTtypeidCall(CGF, llvm::Constant::getNullValue(CGM.VoidPtrTy));
+  Call.setDoesNotReturn();
+  CGF.Builder.CreateUnreachable();
+}
+
+llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF,
+                                         QualType SrcRecordTy,
+                                         llvm::Value *ThisPtr,
+                                         llvm::Type *StdTypeInfoPtrTy) {
+  llvm::Value *Offset;
+  std::tie(ThisPtr, Offset) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
+  return CGF.Builder.CreateBitCast(
+      emitRTtypeidCall(CGF, ThisPtr).getInstruction(), StdTypeInfoPtrTy);
+}
+
+bool MicrosoftCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
+                                                         QualType SrcRecordTy) {
+  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  return SrcIsPtr &&
+         !CGM.getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
+}
+
+llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
+    CodeGenFunction &CGF, llvm::Value *Value, QualType SrcRecordTy,
+    QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
+  llvm::Type *DestLTy = CGF.ConvertType(DestTy);
+
+  llvm::Value *SrcRTTI =
+      CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType());
+  llvm::Value *DestRTTI =
+      CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
+
+  llvm::Value *Offset;
+  std::tie(Value, Offset) = performBaseAdjustment(CGF, Value, SrcRecordTy);
+
+  // PVOID __RTDynamicCast(
+  //   PVOID inptr,
+  //   LONG VfDelta,
+  //   PVOID SrcType,
+  //   PVOID TargetType,
+  //   BOOL isReference)
+  llvm::Type *ArgTypes[] = {CGF.Int8PtrTy, CGF.Int32Ty, CGF.Int8PtrTy,
+                            CGF.Int8PtrTy, CGF.Int32Ty};
+  llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
+      "__RTDynamicCast");
+  llvm::Value *Args[] = {
+      Value, Offset, SrcRTTI, DestRTTI,
+      llvm::ConstantInt::get(CGF.Int32Ty, DestTy->isReferenceType())};
+  Value = CGF.EmitRuntimeCallOrInvoke(Function, Args).getInstruction();
+  return CGF.Builder.CreateBitCast(Value, DestLTy);
+}
+
+llvm::Value *
+MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *Value,
+                                       QualType SrcRecordTy,
+                                       QualType DestTy) {
+  llvm::Value *Offset;
+  std::tie(Value, Offset) = performBaseAdjustment(CGF, Value, SrcRecordTy);
+
+  // PVOID __RTCastToVoid(
+  //   PVOID inptr)
+  llvm::Type *ArgTypes[] = {CGF.Int8PtrTy};
+  llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
+      "__RTCastToVoid");
+  llvm::Value *Args[] = {Value};
+  return CGF.EmitRuntimeCall(Function, Args);
+}
+
+bool MicrosoftCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
+  return false;
 }
 
 llvm::Value *
@@ -426,7 +740,8 @@ MicrosoftCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
                                            llvm::Value *This,
                                            const CXXRecordDecl *ClassDecl,
                                            const CXXRecordDecl *BaseClassDecl) {
-  int64_t VBPtrChars = GetVBPtrOffsetFromBases(ClassDecl).getQuantity();
+  int64_t VBPtrChars =
+      getContext().getASTRecordLayout(ClassDecl).getVBPtrOffset().getQuantity();
   llvm::Value *VBPtrOffset = llvm::ConstantInt::get(CGM.PtrDiffTy, VBPtrChars);
   CharUnits IntSize = getContext().getTypeSizeInChars(getContext().IntTy);
   CharUnits VBTableChars =
@@ -446,16 +761,41 @@ bool MicrosoftCXXABI::HasThisReturn(GlobalDecl GD) const {
   return isa<CXXConstructorDecl>(GD.getDecl());
 }
 
-void MicrosoftCXXABI::BuildConstructorSignature(const CXXConstructorDecl *Ctor,
-                                 CXXCtorType Type,
-                                 CanQualType &ResTy,
-                                 SmallVectorImpl<CanQualType> &ArgTys) {
-  // 'this' parameter and 'this' return are already in place
+bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
+  const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
+  if (!RD)
+    return false;
+
+  if (FI.isInstanceMethod()) {
+    // If it's an instance method, aggregates are always returned indirectly via
+    // the second parameter.
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
+    return true;
+  } else if (!RD->isPOD()) {
+    // If it's a free function, non-POD types are returned indirectly.
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return true;
+  }
+
+  // Otherwise, use the C ABI rules.
+  return false;
+}
+
+void MicrosoftCXXABI::BuildConstructorSignature(
+    const CXXConstructorDecl *Ctor, CXXCtorType Type, CanQualType &ResTy,
+    SmallVectorImpl<CanQualType> &ArgTys) {
+
+  // All parameters are already in place except is_most_derived, which goes
+  // after 'this' if it's variadic and last if it's not.
 
   const CXXRecordDecl *Class = Ctor->getParent();
+  const FunctionProtoType *FPT = Ctor->getType()->castAs<FunctionProtoType>();
   if (Class->getNumVBases()) {
-    // Constructors of classes with virtual bases take an implicit parameter.
-    ArgTys.push_back(CGM.getContext().IntTy);
+    if (FPT->isVariadic())
+      ArgTys.insert(ArgTys.begin() + 1, CGM.getContext().IntTy);
+    else
+      ArgTys.push_back(CGM.getContext().IntTy);
   }
 }
 
@@ -505,7 +845,7 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
 
   unsigned AS =
       cast<llvm::PointerType>(getThisValue(CGF)->getType())->getAddressSpace();
-  llvm::Value *Int8This = 0;  // Initialize lazily.
+  llvm::Value *Int8This = nullptr;  // Initialize lazily.
 
   for (VBOffsets::const_iterator I = VBaseMap.begin(), E = VBaseMap.end();
         I != E; ++I) {
@@ -547,19 +887,23 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
                                       const CXXRecordDecl *RD) {
   llvm::Value *ThisInt8Ptr =
     CGF.Builder.CreateBitCast(getThisValue(CGF), CGM.Int8PtrTy, "this.int8");
+  const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
 
-  const VBTableVector &VBTables = EnumerateVBTables(RD);
-  for (VBTableVector::const_iterator I = VBTables.begin(), E = VBTables.end();
-       I != E; ++I) {
+  const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
+  for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
+    const VPtrInfo *VBT = (*VBGlobals.VBTables)[I];
+    llvm::GlobalVariable *GV = VBGlobals.Globals[I];
     const ASTRecordLayout &SubobjectLayout =
-      CGM.getContext().getASTRecordLayout(I->VBPtrSubobject.getBase());
-    uint64_t Offs = (I->VBPtrSubobject.getBaseOffset() +
-                     SubobjectLayout.getVBPtrOffset()).getQuantity();
+        CGM.getContext().getASTRecordLayout(VBT->BaseWithVPtr);
+    CharUnits Offs = VBT->NonVirtualOffset;
+    Offs += SubobjectLayout.getVBPtrOffset();
+    if (VBT->getVBaseWithVPtr())
+      Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
     llvm::Value *VBPtr =
-        CGF.Builder.CreateConstInBoundsGEP1_64(ThisInt8Ptr, Offs);
-    VBPtr = CGF.Builder.CreateBitCast(VBPtr, I->GV->getType()->getPointerTo(0),
-                                      "vbptr." + I->ReusingBase->getName());
-    CGF.Builder.CreateStore(I->GV, VBPtr);
+        CGF.Builder.CreateConstInBoundsGEP1_64(ThisInt8Ptr, Offs.getQuantity());
+    VBPtr = CGF.Builder.CreateBitCast(VBPtr, GV->getType()->getPointerTo(0),
+                                      "vbptr." + VBT->ReusingBase->getName());
+    CGF.Builder.CreateStore(GV, VBPtr);
   }
 }
 
@@ -583,12 +927,61 @@ void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
   CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
 }
 
-llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualCall(
-    CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
+CharUnits
+MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
+  GD = GD.getCanonicalDecl();
+  const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
+
+  GlobalDecl LookupGD = GD;
+  if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
+    // Complete destructors take a pointer to the complete object as a
+    // parameter, thus don't need this adjustment.
+    if (GD.getDtorType() == Dtor_Complete)
+      return CharUnits();
+
+    // There's no Dtor_Base in vftable but it shares the this adjustment with
+    // the deleting one, so look it up instead.
+    LookupGD = GlobalDecl(DD, Dtor_Deleting);
+  }
+
+  MicrosoftVTableContext::MethodVFTableLocation ML =
+      CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
+  CharUnits Adjustment = ML.VFPtrOffset;
+
+  // Normal virtual instance methods need to adjust from the vfptr that first
+  // defined the virtual method to the virtual base subobject, but destructors
+  // do not.  The vector deleting destructor thunk applies this adjustment for
+  // us if necessary.
+  if (isa<CXXDestructorDecl>(MD))
+    Adjustment = CharUnits::Zero();
+
+  if (ML.VBase) {
+    const ASTRecordLayout &DerivedLayout =
+        CGM.getContext().getASTRecordLayout(MD->getParent());
+    Adjustment += DerivedLayout.getVBaseClassOffset(ML.VBase);
+  }
+
+  return Adjustment;
+}
+
+llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
+    CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This, bool VirtualCall) {
+  if (!VirtualCall) {
+    // If the call of a virtual function is not virtual, we just have to
+    // compensate for the adjustment the virtual function does in its prologue.
+    CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
+    if (Adjustment.isZero())
+      return This;
+
+    unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
+    llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS);
+    This = CGF.Builder.CreateBitCast(This, charPtrTy);
+    assert(Adjustment.isPositive());
+    return CGF.Builder.CreateConstGEP1_32(This, Adjustment.getQuantity());
+  }
+
   GD = GD.getCanonicalDecl();
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
-  // FIXME: consider splitting the vdtor vs regular method code into two
-  // functions.
 
   GlobalDecl LookupGD = GD;
   if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
@@ -607,51 +1000,18 @@ llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualCall(
   unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
   llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS);
   CharUnits StaticOffset = ML.VFPtrOffset;
-  if (ML.VBase) {
-    bool AvoidVirtualOffset = false;
-    if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base) {
-      // A base destructor can only be called from a complete destructor of the
-      // same record type or another destructor of a more derived type;
-      // or a constructor of the same record type if an exception is thrown.
-      assert(isa<CXXDestructorDecl>(CGF.CurGD.getDecl()) ||
-             isa<CXXConstructorDecl>(CGF.CurGD.getDecl()));
-      const CXXRecordDecl *CurRD =
-          cast<CXXMethodDecl>(CGF.CurGD.getDecl())->getParent();
-
-      if (MD->getParent() == CurRD) {
-        if (isa<CXXDestructorDecl>(CGF.CurGD.getDecl()))
-          assert(CGF.CurGD.getDtorType() == Dtor_Complete);
-        if (isa<CXXConstructorDecl>(CGF.CurGD.getDecl()))
-          assert(CGF.CurGD.getCtorType() == Ctor_Complete);
-        // We're calling the main base dtor from a complete structor,
-        // so we know the "this" offset statically.
-        AvoidVirtualOffset = true;
-      } else {
-        // Let's see if we try to call a destructor of a non-virtual base.
-        for (CXXRecordDecl::base_class_const_iterator I = CurRD->bases_begin(),
-             E = CurRD->bases_end(); I != E; ++I) {
-          if (I->getType()->getAsCXXRecordDecl() != MD->getParent())
-            continue;
-          // If we call a base destructor for a non-virtual base, we statically
-          // know where it expects the vfptr and "this" to be.
-          // The total offset should reflect the adjustment done by
-          // adjustThisParameterInVirtualFunctionPrologue().
-          AvoidVirtualOffset = true;
-          break;
-        }
-      }
-    }
 
-    if (AvoidVirtualOffset) {
-      const ASTRecordLayout &Layout =
-          CGF.getContext().getASTRecordLayout(MD->getParent());
-      StaticOffset += Layout.getVBaseClassOffset(ML.VBase);
-    } else {
-      This = CGF.Builder.CreateBitCast(This, charPtrTy);
-      llvm::Value *VBaseOffset =
-          GetVirtualBaseClassOffset(CGF, This, MD->getParent(), ML.VBase);
-      This = CGF.Builder.CreateInBoundsGEP(This, VBaseOffset);
-    }
+  // Base destructors expect 'this' to point to the beginning of the base
+  // subobject, not the first vfptr that happens to contain the virtual dtor.
+  // However, we still need to apply the virtual base adjustment.
+  if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
+    StaticOffset = CharUnits::Zero();
+
+  if (ML.VBase) {
+    This = CGF.Builder.CreateBitCast(This, charPtrTy);
+    llvm::Value *VBaseOffset =
+        GetVirtualBaseClassOffset(CGF, This, MD->getParent(), ML.VBase);
+    This = CGF.Builder.CreateInBoundsGEP(This, VBaseOffset);
   }
   if (!StaticOffset.isZero()) {
     assert(StaticOffset.isPositive());
@@ -678,24 +1038,29 @@ static bool IsDeletingDtor(GlobalDecl GD) {
   return false;
 }
 
-void MicrosoftCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
-                                                  QualType &ResTy,
-                                                  FunctionArgList &Params) {
-  BuildThisParam(CGF, Params);
-
+void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
+                                                QualType &ResTy,
+                                                FunctionArgList &Params) {
   ASTContext &Context = getContext();
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
+  assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
   if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
     ImplicitParamDecl *IsMostDerived
-      = ImplicitParamDecl::Create(Context, 0,
+      = ImplicitParamDecl::Create(Context, nullptr,
                                   CGF.CurGD.getDecl()->getLocation(),
                                   &Context.Idents.get("is_most_derived"),
                                   Context.IntTy);
-    Params.push_back(IsMostDerived);
+    // The 'most_derived' parameter goes second if the ctor is variadic and last
+    // if it's not.  Dtors can't be variadic.
+    const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
+    if (FPT->isVariadic())
+      Params.insert(Params.begin() + 1, IsMostDerived);
+    else
+      Params.push_back(IsMostDerived);
     getStructorImplicitParamDecl(CGF) = IsMostDerived;
   } else if (IsDeletingDtor(CGF.CurGD)) {
     ImplicitParamDecl *ShouldDelete
-      = ImplicitParamDecl::Create(Context, 0,
+      = ImplicitParamDecl::Create(Context, nullptr,
                                   CGF.CurGD.getDecl()->getLocation(),
                                   &Context.Idents.get("should_call_delete"),
                                   Context.IntTy);
@@ -706,36 +1071,12 @@ void MicrosoftCXXABI::BuildInstanceFunctionParams(CodeGenFunction &CGF,
 
 llvm::Value *MicrosoftCXXABI::adjustThisParameterInVirtualFunctionPrologue(
     CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
-  GD = GD.getCanonicalDecl();
-  const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
-
-  GlobalDecl LookupGD = GD;
-  if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
-    // Complete destructors take a pointer to the complete object as a
-    // parameter, thus don't need this adjustment.
-    if (GD.getDtorType() == Dtor_Complete)
-      return This;
-
-    // There's no Dtor_Base in vftable but it shares the this adjustment with
-    // the deleting one, so look it up instead.
-    LookupGD = GlobalDecl(DD, Dtor_Deleting);
-  }
-
   // In this ABI, every virtual function takes a pointer to one of the
   // subobjects that first defines it as the 'this' parameter, rather than a
-  // pointer to ther final overrider subobject. Thus, we need to adjust it back
+  // pointer to the final overrider subobject. Thus, we need to adjust it back
   // to the final overrider subobject before use.
   // See comments in the MicrosoftVFTableContext implementation for the details.
-
-  MicrosoftVTableContext::MethodVFTableLocation ML =
-      CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
-  CharUnits Adjustment = ML.VFPtrOffset;
-  if (ML.VBase) {
-    const ASTRecordLayout &DerivedLayout =
-        CGF.getContext().getASTRecordLayout(MD->getParent());
-    Adjustment += DerivedLayout.getVBaseClassOffset(ML.VBase);
-  }
-
+  CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
   if (Adjustment.isZero())
     return This;
 
@@ -784,61 +1125,81 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
   }
 }
 
-void MicrosoftCXXABI::EmitConstructorCall(CodeGenFunction &CGF,
-                                          const CXXConstructorDecl *D,
-                                          CXXCtorType Type, 
-                                          bool ForVirtualBase,
-                                          bool Delegating,
-                                          llvm::Value *This,
-                                          CallExpr::const_arg_iterator ArgBeg,
-                                          CallExpr::const_arg_iterator ArgEnd) {
+unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
+    CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
+    bool ForVirtualBase, bool Delegating, CallArgList &Args) {
   assert(Type == Ctor_Complete || Type == Ctor_Base);
-  llvm::Value *Callee = CGM.GetAddrOfCXXConstructor(D, Ctor_Complete);
 
-  llvm::Value *ImplicitParam = 0;
-  QualType ImplicitParamTy;
-  if (D->getParent()->getNumVBases()) {
-    ImplicitParam = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
-    ImplicitParamTy = getContext().IntTy;
+  // Check if we need a 'most_derived' parameter.
+  if (!D->getParent()->getNumVBases())
+    return 0;
+
+  // Add the 'most_derived' argument second if we are variadic or last if not.
+  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
+  llvm::Value *MostDerivedArg =
+      llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
+  RValue RV = RValue::get(MostDerivedArg);
+  if (MostDerivedArg) {
+    if (FPT->isVariadic())
+      Args.insert(Args.begin() + 1,
+                  CallArg(RV, getContext().IntTy, /*needscopy=*/false));
+    else
+      Args.add(RV, getContext().IntTy);
+  }
+
+  return 1;  // Added one arg.
+}
+
+void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
+                                         const CXXDestructorDecl *DD,
+                                         CXXDtorType Type, bool ForVirtualBase,
+                                         bool Delegating, llvm::Value *This) {
+  llvm::Value *Callee = CGM.GetAddrOfCXXDestructor(DD, Type);
+
+  if (DD->isVirtual()) {
+    assert(Type != CXXDtorType::Dtor_Deleting &&
+           "The deleting destructor should only be called via a virtual call");
+    This = adjustThisArgumentForVirtualFunctionCall(CGF, GlobalDecl(DD, Type),
+                                                    This, false);
   }
 
   // FIXME: Provide a source location here.
-  CGF.EmitCXXMemberCall(D, SourceLocation(), Callee, ReturnValueSlot(), This,
-                        ImplicitParam, ImplicitParamTy, ArgBeg, ArgEnd);
+  CGF.EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
+                        /*ImplicitParam=*/nullptr,
+                        /*ImplicitParamTy=*/QualType(), nullptr, nullptr);
 }
 
 void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
                                             const CXXRecordDecl *RD) {
   MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
-  MicrosoftVTableContext::VFPtrListTy VFPtrs = VFTContext.getVFPtrOffsets(RD);
-  llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
+  VPtrInfoVector VFPtrs = VFTContext.getVFPtrOffsets(RD);
 
-  for (MicrosoftVTableContext::VFPtrListTy::iterator I = VFPtrs.begin(),
-       E = VFPtrs.end(); I != E; ++I) {
-    llvm::GlobalVariable *VTable = getAddrOfVTable(RD, I->VFPtrFullOffset);
+  for (VPtrInfo *Info : VFPtrs) {
+    llvm::GlobalVariable *VTable = getAddrOfVTable(RD, Info->FullOffsetInMDC);
     if (VTable->hasInitializer())
       continue;
 
+    llvm::Constant *RTTI = getMSCompleteObjectLocator(RD, Info);
+
     const VTableLayout &VTLayout =
-        VFTContext.getVFTableLayout(RD, I->VFPtrFullOffset);
+      VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);
     llvm::Constant *Init = CGVT.CreateVTableInitializer(
         RD, VTLayout.vtable_component_begin(),
         VTLayout.getNumVTableComponents(), VTLayout.vtable_thunk_begin(),
-        VTLayout.getNumVTableThunks());
-    VTable->setInitializer(Init);
+        VTLayout.getNumVTableThunks(), RTTI);
 
-    VTable->setLinkage(Linkage);
-    CGM.setTypeVisibility(VTable, RD, CodeGenModule::TVK_ForVTable);
+    VTable->setInitializer(Init);
   }
 }
 
 llvm::Value *MicrosoftCXXABI::getVTableAddressPointInStructor(
     CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
     const CXXRecordDecl *NearestVBase, bool &NeedsVirtualOffset) {
-  NeedsVirtualOffset = (NearestVBase != 0);
+  NeedsVirtualOffset = (NearestVBase != nullptr);
 
-  llvm::Value *VTableAddressPoint =
-      getAddrOfVTable(VTableClass, Base.getBaseOffset());
+  (void)getAddrOfVTable(VTableClass, Base.getBaseOffset());
+  VFTableIdTy ID(VTableClass, Base.getBaseOffset());
+  llvm::GlobalValue *VTableAddressPoint = VFTablesMap[ID];
   if (!VTableAddressPoint) {
     assert(Base.getBase()->getNumVBases() &&
            !CGM.getContext().getASTRecordLayout(Base.getBase()).hasOwnVFPtr());
@@ -847,17 +1208,19 @@ llvm::Value *MicrosoftCXXABI::getVTableAddressPointInStructor(
 }
 
 static void mangleVFTableName(MicrosoftMangleContext &MangleContext,
-                              const CXXRecordDecl *RD, const VFPtrInfo &VFPtr,
+                              const CXXRecordDecl *RD, const VPtrInfo *VFPtr,
                               SmallString<256> &Name) {
   llvm::raw_svector_ostream Out(Name);
-  MangleContext.mangleCXXVFTable(RD, VFPtr.PathToMangle, Out);
+  MangleContext.mangleCXXVFTable(RD, VFPtr->MangledPath, Out);
 }
 
 llvm::Constant *MicrosoftCXXABI::getVTableAddressPointForConstExpr(
     BaseSubobject Base, const CXXRecordDecl *VTableClass) {
-  llvm::Constant *VTable = getAddrOfVTable(VTableClass, Base.getBaseOffset());
-  assert(VTable && "Couldn't find a vftable for the given base?");
-  return VTable;
+  (void)getAddrOfVTable(VTableClass, Base.getBaseOffset());
+  VFTableIdTy ID(VTableClass, Base.getBaseOffset());
+  llvm::GlobalValue *VFTable = VFTablesMap[ID];
+  assert(VFTable && "Couldn't find a vftable for the given base?");
+  return VFTable;
 }
 
 llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
@@ -866,18 +1229,16 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   // shouldn't be used in the given record type. We want to cache this result in
   // VFTablesMap, thus a simple zero check is not sufficient.
   VFTableIdTy ID(RD, VPtrOffset);
-  VFTablesMapTy::iterator I;
+  VTablesMapTy::iterator I;
   bool Inserted;
-  llvm::tie(I, Inserted) = VFTablesMap.insert(
-      std::make_pair(ID, static_cast<llvm::GlobalVariable *>(0)));
+  std::tie(I, Inserted) = VTablesMap.insert(std::make_pair(ID, nullptr));
   if (!Inserted)
     return I->second;
 
   llvm::GlobalVariable *&VTable = I->second;
 
   MicrosoftVTableContext &VTContext = CGM.getMicrosoftVTableContext();
-  const MicrosoftVTableContext::VFPtrListTy &VFPtrs =
-      VTContext.getVFPtrOffsets(RD);
+  const VPtrInfoVector &VFPtrs = VTContext.getVFPtrOffsets(RD);
 
   if (DeferredVFTables.insert(RD)) {
     // We haven't processed this record type before.
@@ -898,19 +1259,94 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   }
 
   for (size_t J = 0, F = VFPtrs.size(); J != F; ++J) {
-    if (VFPtrs[J].VFPtrFullOffset != VPtrOffset)
+    if (VFPtrs[J]->FullOffsetInMDC != VPtrOffset)
       continue;
+    SmallString<256> VFTableName;
+    mangleVFTableName(getMangleContext(), RD, VFPtrs[J], VFTableName);
+    StringRef VTableName = VFTableName;
+
+    uint64_t NumVTableSlots =
+        VTContext.getVFTableLayout(RD, VFPtrs[J]->FullOffsetInMDC)
+            .getNumVTableComponents();
+    llvm::GlobalValue::LinkageTypes VTableLinkage =
+        llvm::GlobalValue::ExternalLinkage;
+    llvm::ArrayType *VTableType =
+        llvm::ArrayType::get(CGM.Int8PtrTy, NumVTableSlots);
+    if (getContext().getLangOpts().RTTIData) {
+      VTableLinkage = llvm::GlobalValue::PrivateLinkage;
+      VTableName = "";
+    }
 
-    llvm::ArrayType *ArrayType = llvm::ArrayType::get(
-        CGM.Int8PtrTy,
-        VTContext.getVFTableLayout(RD, VFPtrs[J].VFPtrFullOffset)
-            .getNumVTableComponents());
-
-    SmallString<256> Name;
-    mangleVFTableName(getMangleContext(), RD, VFPtrs[J], Name);
-    VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
-        Name.str(), ArrayType, llvm::GlobalValue::ExternalLinkage);
-    VTable->setUnnamedAddr(true);
+    VTable = CGM.getModule().getNamedGlobal(VFTableName);
+    if (!VTable) {
+      // Create a backing variable for the contents of VTable.  The VTable may
+      // or may not include space for a pointer to RTTI data.
+      llvm::GlobalValue *VFTable = VTable = new llvm::GlobalVariable(
+          CGM.getModule(), VTableType, /*isConstant=*/true, VTableLinkage,
+          /*Initializer=*/nullptr, VTableName);
+      VTable->setUnnamedAddr(true);
+
+      // Only insert a pointer into the VFTable for RTTI data if we are not
+      // importing it.  We never reference the RTTI data directly so there is no
+      // need to make room for it.
+      if (getContext().getLangOpts().RTTIData &&
+          !RD->hasAttr<DLLImportAttr>()) {
+        llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.IntTy, 0),
+                                     llvm::ConstantInt::get(CGM.IntTy, 1)};
+        // Create a GEP which points just after the first entry in the VFTable,
+        // this should be the location of the first virtual method.
+        llvm::Constant *VTableGEP =
+            llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, GEPIndices);
+        // The symbol for the VFTable is an alias to the GEP.  It is
+        // transparent, to other modules, what the nature of this symbol is; all
+        // that matters is that the alias be the address of the first virtual
+        // method.
+        VFTable = llvm::GlobalAlias::create(
+            cast<llvm::SequentialType>(VTableGEP->getType())->getElementType(),
+            /*AddressSpace=*/0, llvm::GlobalValue::ExternalLinkage,
+            VFTableName.str(), VTableGEP, &CGM.getModule());
+      } else {
+        // We don't need a GlobalAlias to be a symbol for the VTable if we won't
+        // be referencing any RTTI data.  The GlobalVariable will end up being
+        // an appropriate definition of the VFTable.
+        VTable->setName(VFTableName.str());
+      }
+
+      VFTable->setUnnamedAddr(true);
+      if (RD->hasAttr<DLLImportAttr>())
+        VFTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+      else if (RD->hasAttr<DLLExportAttr>())
+        VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+
+      llvm::GlobalValue::LinkageTypes VFTableLinkage = CGM.getVTableLinkage(RD);
+      if (VFTable != VTable) {
+        if (llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage)) {
+          // AvailableExternally implies that we grabbed the data from another
+          // executable.  No need to stick the alias in a Comdat.
+        } else if (llvm::GlobalValue::isInternalLinkage(VFTableLinkage) ||
+                   llvm::GlobalValue::isWeakODRLinkage(VFTableLinkage) ||
+                   llvm::GlobalValue::isLinkOnceODRLinkage(VFTableLinkage)) {
+          // The alias is going to be dropped into a Comdat, no need to make it
+          // weak.
+          if (!llvm::GlobalValue::isInternalLinkage(VFTableLinkage))
+            VFTableLinkage = llvm::GlobalValue::ExternalLinkage;
+          llvm::Comdat *C =
+              CGM.getModule().getOrInsertComdat(VFTable->getName());
+          // We must indicate which VFTable is larger to support linking between
+          // translation units which do and do not have RTTI data.  The largest
+          // VFTable contains the RTTI data; translation units which reference
+          // the smaller VFTable always reference it relative to the first
+          // virtual method.
+          C->setSelectionKind(llvm::Comdat::Largest);
+          VTable->setComdat(C);
+        } else {
+          llvm_unreachable("unexpected linkage for vftable!");
+        }
+      }
+      VFTable->setLinkage(VFTableLinkage);
+      CGM.setGlobalVisibility(VFTable, RD);
+      VFTablesMap[ID] = VFTable;
+    }
     break;
   }
 
@@ -925,7 +1361,8 @@ llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
   CGBuilderTy &Builder = CGF.Builder;
 
   Ty = Ty->getPointerTo()->getPointerTo();
-  llvm::Value *VPtr = adjustThisArgumentForVirtualCall(CGF, GD, This);
+  llvm::Value *VPtr =
+      adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
   llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty);
 
   MicrosoftVTableContext::MethodVFTableLocation ML =
@@ -955,30 +1392,47 @@ void MicrosoftCXXABI::EmitVirtualDestructorCall(CodeGenFunction &CGF,
       llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
                              DtorType == Dtor_Deleting);
 
-  This = adjustThisArgumentForVirtualCall(CGF, GD, This);
+  This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
   CGF.EmitCXXMemberCall(Dtor, CallLoc, Callee, ReturnValueSlot(), This,
-                        ImplicitParam, Context.IntTy, 0, 0);
+                        ImplicitParam, Context.IntTy, nullptr, nullptr);
 }
 
-const VBTableVector &
-MicrosoftCXXABI::EnumerateVBTables(const CXXRecordDecl *RD) {
+const VBTableGlobals &
+MicrosoftCXXABI::enumerateVBTables(const CXXRecordDecl *RD) {
   // At this layer, we can key the cache off of a single class, which is much
-  // easier than caching at the GlobalVariable layer.
-  llvm::DenseMap<const CXXRecordDecl*, VBTableVector>::iterator I;
-  bool added;
-  llvm::tie(I, added) = VBTablesMap.insert(std::make_pair(RD, VBTableVector()));
-  VBTableVector &VBTables = I->second;
-  if (!added)
-    return VBTables;
-
-  VBTableBuilder(CGM, RD).enumerateVBTables(VBTables);
+  // easier than caching each vbtable individually.
+  llvm::DenseMap<const CXXRecordDecl*, VBTableGlobals>::iterator Entry;
+  bool Added;
+  std::tie(Entry, Added) =
+      VBTablesMap.insert(std::make_pair(RD, VBTableGlobals()));
+  VBTableGlobals &VBGlobals = Entry->second;
+  if (!Added)
+    return VBGlobals;
+
+  MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
+  VBGlobals.VBTables = &Context.enumerateVBTables(RD);
+
+  // Cache the globals for all vbtables so we don't have to recompute the
+  // mangled names.
+  llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
+  for (VPtrInfoVector::const_iterator I = VBGlobals.VBTables->begin(),
+                                      E = VBGlobals.VBTables->end();
+       I != E; ++I) {
+    VBGlobals.Globals.push_back(getAddrOfVBTable(**I, RD, Linkage));
+  }
 
-  return VBTables;
+  return VBGlobals;
 }
 
-llvm::Function *
-MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
-                                        StringRef ThunkName) {
+llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
+    const CXXMethodDecl *MD,
+    const MicrosoftVTableContext::MethodVFTableLocation &ML) {
+  // Calculate the mangled name.
+  SmallString<256> ThunkName;
+  llvm::raw_svector_ostream Out(ThunkName);
+  getMangleContext().mangleVirtualMemPtrThunk(MD, Out);
+  Out.flush();
+
   // If the thunk has been generated previously, just return it.
   if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
     return cast<llvm::Function>(GV);
@@ -1002,24 +1456,127 @@ MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD,
   CodeGenFunction CGF(CGM);
   CGF.StartThunk(ThunkFn, MD, FnInfo);
 
-  // Get to the Callee.
+  // Load the vfptr and then callee from the vftable.  The callee should have
+  // adjusted 'this' so that the vfptr is at offset zero.
   llvm::Value *This = CGF.LoadCXXThis();
-  llvm::Value *Callee = getVirtualFunctionPointer(CGF, MD, This, ThunkTy);
+  llvm::Value *VTable =
+      CGF.GetVTablePtr(This, ThunkTy->getPointerTo()->getPointerTo());
+  llvm::Value *VFuncPtr =
+      CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
+  llvm::Value *Callee = CGF.Builder.CreateLoad(VFuncPtr);
+
+  unsigned CallingConv;
+  CodeGen::AttributeListType AttributeList;
+  CGM.ConstructAttributeList(FnInfo, MD, AttributeList, CallingConv, true);
+  llvm::AttributeSet Attrs =
+      llvm::AttributeSet::get(CGF.getLLVMContext(), AttributeList);
+
+  // Do a musttail call with perfect argument forwarding.  Any inalloca argument
+  // will be forwarded in place without any copy.
+  SmallVector<llvm::Value *, 8> Args;
+  for (llvm::Argument &A : ThunkFn->args())
+    Args.push_back(&A);
+  llvm::CallInst *Call = CGF.Builder.CreateCall(Callee, Args);
+  Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
+  Call->setAttributes(Attrs);
+  Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
+
+  if (Call->getType()->isVoidTy())
+    CGF.Builder.CreateRetVoid();
+  else
+    CGF.Builder.CreateRet(Call);
 
-  // Make the call and return the result.
-  CGF.EmitCallAndReturnForThunk(MD, Callee, 0);
+  // Finish the function to maintain CodeGenFunction invariants.
+  // FIXME: Don't emit unreachable code.
+  CGF.EmitBlock(CGF.createBasicBlock());
+  CGF.FinishFunction();
 
   return ThunkFn;
 }
 
 void MicrosoftCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
-  const VBTableVector &VBTables = EnumerateVBTables(RD);
-  llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
+  const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
+  for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
+    const VPtrInfo *VBT = (*VBGlobals.VBTables)[I];
+    llvm::GlobalVariable *GV = VBGlobals.Globals[I];
+    emitVBTableDefinition(*VBT, RD, GV);
+  }
+}
 
-  for (VBTableVector::const_iterator I = VBTables.begin(), E = VBTables.end();
-       I != E; ++I) {
-    I->EmitVBTableDefinition(CGM, RD, Linkage);
+llvm::GlobalVariable *
+MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
+                                  llvm::GlobalVariable::LinkageTypes Linkage) {
+  SmallString<256> OutName;
+  llvm::raw_svector_ostream Out(OutName);
+  getMangleContext().mangleCXXVBTable(RD, VBT.MangledPath, Out);
+  Out.flush();
+  StringRef Name = OutName.str();
+
+  llvm::ArrayType *VBTableType =
+      llvm::ArrayType::get(CGM.IntTy, 1 + VBT.ReusingBase->getNumVBases());
+
+  assert(!CGM.getModule().getNamedGlobal(Name) &&
+         "vbtable with this name already exists: mangling bug?");
+  llvm::GlobalVariable *GV =
+      CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage);
+  GV->setUnnamedAddr(true);
+
+  if (RD->hasAttr<DLLImportAttr>())
+    GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+  else if (RD->hasAttr<DLLExportAttr>())
+    GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+
+  return GV;
+}
+
+void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT,
+                                            const CXXRecordDecl *RD,
+                                            llvm::GlobalVariable *GV) const {
+  const CXXRecordDecl *ReusingBase = VBT.ReusingBase;
+
+  assert(RD->getNumVBases() && ReusingBase->getNumVBases() &&
+         "should only emit vbtables for classes with vbtables");
+
+  const ASTRecordLayout &BaseLayout =
+      CGM.getContext().getASTRecordLayout(VBT.BaseWithVPtr);
+  const ASTRecordLayout &DerivedLayout =
+    CGM.getContext().getASTRecordLayout(RD);
+
+  SmallVector<llvm::Constant *, 4> Offsets(1 + ReusingBase->getNumVBases(),
+                                           nullptr);
+
+  // The offset from ReusingBase's vbptr to itself always leads.
+  CharUnits VBPtrOffset = BaseLayout.getVBPtrOffset();
+  Offsets[0] = llvm::ConstantInt::get(CGM.IntTy, -VBPtrOffset.getQuantity());
+
+  MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
+  for (const auto &I : ReusingBase->vbases()) {
+    const CXXRecordDecl *VBase = I.getType()->getAsCXXRecordDecl();
+    CharUnits Offset = DerivedLayout.getVBaseClassOffset(VBase);
+    assert(!Offset.isNegative());
+
+    // Make it relative to the subobject vbptr.
+    CharUnits CompleteVBPtrOffset = VBT.NonVirtualOffset + VBPtrOffset;
+    if (VBT.getVBaseWithVPtr())
+      CompleteVBPtrOffset +=
+          DerivedLayout.getVBaseClassOffset(VBT.getVBaseWithVPtr());
+    Offset -= CompleteVBPtrOffset;
+
+    unsigned VBIndex = Context.getVBTableIndex(ReusingBase, VBase);
+    assert(Offsets[VBIndex] == nullptr && "The same vbindex seen twice?");
+    Offsets[VBIndex] = llvm::ConstantInt::get(CGM.IntTy, Offset.getQuantity());
   }
+
+  assert(Offsets.size() ==
+         cast<llvm::ArrayType>(cast<llvm::PointerType>(GV->getType())
+                               ->getElementType())->getNumElements());
+  llvm::ArrayType *VBTableType =
+    llvm::ArrayType::get(CGM.IntTy, Offsets.size());
+  llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
+  GV->setInitializer(Init);
+
+  // Set the right visibility.
+  CGM.setGlobalVisibility(GV, RD);
 }
 
 llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
@@ -1149,6 +1706,15 @@ llvm::Value* MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
 void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
                                       llvm::GlobalVariable *GV,
                                       bool PerformInit) {
+  // MSVC only uses guards for static locals.
+  if (!D.isStaticLocal()) {
+    assert(GV->hasWeakLinkage() || GV->hasLinkOnceLinkage());
+    // GlobalOpt is allowed to discard the initializer, so use linkonce_odr.
+    CGF.CurFn->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
+    CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
+    return;
+  }
+
   // MSVC always uses an i32 bitfield to guard initialization, which is *not*
   // threadsafe.  Since the user may be linking in inline functions compiled by
   // cl.exe, there's no reason to provide a false sense of security by using
@@ -1162,29 +1728,29 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
   llvm::ConstantInt *Zero = llvm::ConstantInt::get(GuardTy, 0);
 
   // Get the guard variable for this function if we have one already.
-  GuardInfo &GI = GuardVariableMap[D.getDeclContext()];
+  GuardInfo *GI = &GuardVariableMap[D.getDeclContext()];
 
   unsigned BitIndex;
-  if (D.isExternallyVisible()) {
+  if (D.isStaticLocal() && D.isExternallyVisible()) {
     // Externally visible variables have to be numbered in Sema to properly
     // handle unreachable VarDecls.
-    BitIndex = getContext().getManglingNumber(&D);
+    BitIndex = getContext().getStaticLocalNumber(&D);
     assert(BitIndex > 0);
     BitIndex--;
   } else {
     // Non-externally visible variables are numbered here in CodeGen.
-    BitIndex = GI.BitIndex++;
+    BitIndex = GI->BitIndex++;
   }
 
   if (BitIndex >= 32) {
     if (D.isExternallyVisible())
       ErrorUnsupportedABI(CGF, "more than 32 guarded initializations");
     BitIndex %= 32;
-    GI.Guard = 0;
+    GI->Guard = nullptr;
   }
 
   // Lazily create the i32 bitfield for this function.
-  if (!GI.Guard) {
+  if (!GI->Guard) {
     // Mangle the name for the guard.
     SmallString<256> GuardName;
     {
@@ -1193,13 +1759,15 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
       Out.flush();
     }
 
-    // Create the guard variable with a zero-initializer.  Just absorb linkage
-    // and visibility from the guarded variable.
-    GI.Guard = new llvm::GlobalVariable(CGM.getModule(), GuardTy, false,
-                                     GV->getLinkage(), Zero, GuardName.str());
-    GI.Guard->setVisibility(GV->getVisibility());
+    // Create the guard variable with a zero-initializer. Just absorb linkage,
+    // visibility and dll storage class from the guarded variable.
+    GI->Guard =
+        new llvm::GlobalVariable(CGM.getModule(), GuardTy, false,
+                                 GV->getLinkage(), Zero, GuardName.str());
+    GI->Guard->setVisibility(GV->getVisibility());
+    GI->Guard->setDLLStorageClass(GV->getDLLStorageClass());
   } else {
-    assert(GI.Guard->getLinkage() == GV->getLinkage() &&
+    assert(GI->Guard->getLinkage() == GV->getLinkage() &&
            "static local from the same function had different linkage");
   }
 
@@ -1211,7 +1779,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
 
   // Test our bit from the guard variable.
   llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << BitIndex);
-  llvm::LoadInst *LI = Builder.CreateLoad(GI.Guard);
+  llvm::LoadInst *LI = Builder.CreateLoad(GI->Guard);
   llvm::Value *IsInitialized =
       Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
   llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
@@ -1221,7 +1789,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
   // Set our bit in the guard variable and emit the initializer and add a global
   // destructor if appropriate.
   CGF.EmitBlock(InitBlock);
-  Builder.CreateStore(Builder.CreateOr(LI, Bit), GI.Guard);
+  Builder.CreateStore(Builder.CreateOr(LI, Bit), GI->Guard);
   CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
   Builder.CreateBr(EndBlock);
 
@@ -1229,38 +1797,6 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
   CGF.EmitBlock(EndBlock);
 }
 
-// Member pointer helpers.
-static bool hasVBPtrOffsetField(MSInheritanceModel Inheritance) {
-  return Inheritance == MSIM_Unspecified;
-}
-
-static bool hasOnlyOneField(bool IsMemberFunction,
-                            MSInheritanceModel Inheritance) {
-  return Inheritance <= MSIM_SinglePolymorphic ||
-      (!IsMemberFunction && Inheritance <= MSIM_MultiplePolymorphic);
-}
-
-// Only member pointers to functions need a this adjustment, since it can be
-// combined with the field offset for data pointers.
-static bool hasNonVirtualBaseAdjustmentField(bool IsMemberFunction,
-                                             MSInheritanceModel Inheritance) {
-  return (IsMemberFunction && Inheritance >= MSIM_Multiple);
-}
-
-static bool hasVirtualBaseAdjustmentField(MSInheritanceModel Inheritance) {
-  return Inheritance >= MSIM_Virtual;
-}
-
-// Use zero for the field offset of a null data member pointer if we can
-// guarantee that zero is not a valid field offset, or if the member pointer has
-// multiple fields.  Polymorphic classes have a vfptr at offset zero, so we can
-// use zero for null.  If there are multiple fields, we can use zero even if it
-// is a valid field offset because null-ness testing will check the other
-// fields.
-static bool nullFieldOffsetIsZero(MSInheritanceModel Inheritance) {
-  return Inheritance != MSIM_Multiple && Inheritance != MSIM_Single;
-}
-
 bool MicrosoftCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
   // Null-ness for function memptrs only depends on the first field, which is
   // the function pointer.  The rest don't matter, so we can zero initialize.
@@ -1270,28 +1806,28 @@ bool MicrosoftCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
   // The virtual base adjustment field is always -1 for null, so if we have one
   // we can't zero initialize.  The field offset is sometimes also -1 if 0 is a
   // valid field offset.
-  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
-  MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
-  return (!hasVirtualBaseAdjustmentField(Inheritance) &&
-          nullFieldOffsetIsZero(Inheritance));
+  const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+  MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
+  return (!MSInheritanceAttr::hasVBTableOffsetField(Inheritance) &&
+          RD->nullFieldOffsetIsZero());
 }
 
 llvm::Type *
 MicrosoftCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
-  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
-  MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+  const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+  MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
   llvm::SmallVector<llvm::Type *, 4> fields;
   if (MPT->isMemberFunctionPointer())
     fields.push_back(CGM.VoidPtrTy);  // FunctionPointerOrVirtualThunk
   else
     fields.push_back(CGM.IntTy);  // FieldOffset
 
-  if (hasNonVirtualBaseAdjustmentField(MPT->isMemberFunctionPointer(),
-                                       Inheritance))
+  if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
+                                          Inheritance))
     fields.push_back(CGM.IntTy);
-  if (hasVBPtrOffsetField(Inheritance))
+  if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
     fields.push_back(CGM.IntTy);
-  if (hasVirtualBaseAdjustmentField(Inheritance))
+  if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
     fields.push_back(CGM.IntTy);  // VirtualBaseAdjustmentOffset
 
   if (fields.size() == 1)
@@ -1303,24 +1839,24 @@ void MicrosoftCXXABI::
 GetNullMemberPointerFields(const MemberPointerType *MPT,
                            llvm::SmallVectorImpl<llvm::Constant *> &fields) {
   assert(fields.empty());
-  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
-  MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+  const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+  MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
   if (MPT->isMemberFunctionPointer()) {
     // FunctionPointerOrVirtualThunk
     fields.push_back(llvm::Constant::getNullValue(CGM.VoidPtrTy));
   } else {
-    if (nullFieldOffsetIsZero(Inheritance))
+    if (RD->nullFieldOffsetIsZero())
       fields.push_back(getZeroInt());  // FieldOffset
     else
       fields.push_back(getAllOnesInt());  // FieldOffset
   }
 
-  if (hasNonVirtualBaseAdjustmentField(MPT->isMemberFunctionPointer(),
-                                       Inheritance))
+  if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
+                                          Inheritance))
     fields.push_back(getZeroInt());
-  if (hasVBPtrOffsetField(Inheritance))
+  if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
     fields.push_back(getZeroInt());
-  if (hasVirtualBaseAdjustmentField(Inheritance))
+  if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
     fields.push_back(getAllOnesInt());
 }
 
@@ -1341,29 +1877,29 @@ MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField,
                                        const CXXRecordDecl *RD,
                                        CharUnits NonVirtualBaseAdjustment)
 {
-  MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+  MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
 
   // Single inheritance class member pointer are represented as scalars instead
   // of aggregates.
-  if (hasOnlyOneField(IsMemberFunction, Inheritance))
+  if (MSInheritanceAttr::hasOnlyOneField(IsMemberFunction, Inheritance))
     return FirstField;
 
   llvm::SmallVector<llvm::Constant *, 4> fields;
   fields.push_back(FirstField);
 
-  if (hasNonVirtualBaseAdjustmentField(IsMemberFunction, Inheritance))
+  if (MSInheritanceAttr::hasNVOffsetField(IsMemberFunction, Inheritance))
     fields.push_back(llvm::ConstantInt::get(
       CGM.IntTy, NonVirtualBaseAdjustment.getQuantity()));
 
-  if (hasVBPtrOffsetField(Inheritance)) {
+  if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) {
     CharUnits Offs = CharUnits::Zero();
     if (RD->getNumVBases())
-      Offs = GetVBPtrOffsetFromBases(RD);
+      Offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
     fields.push_back(llvm::ConstantInt::get(CGM.IntTy, Offs.getQuantity()));
   }
 
   // The rest of the fields are adjusted by conversions to a more derived class.
-  if (hasVirtualBaseAdjustmentField(Inheritance))
+  if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
     fields.push_back(getZeroInt());
 
   return llvm::ConstantStruct::getAnon(fields);
@@ -1372,7 +1908,7 @@ MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField,
 llvm::Constant *
 MicrosoftCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
                                        CharUnits offset) {
-  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
+  const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
   llvm::Constant *FirstField =
     llvm::ConstantInt::get(CGM.IntTy, offset.getQuantity());
   return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/false, RD,
@@ -1395,8 +1931,8 @@ llvm::Constant *MicrosoftCXXABI::EmitMemberPointer(const APValue &MP,
   // FIXME PR15713: Support virtual inheritance paths.
 
   if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD))
-    return BuildMemberPointer(MPT->getClass()->getAsCXXRecordDecl(),
-                              MD, ThisAdjustment);
+    return BuildMemberPointer(MPT->getMostRecentCXXRecordDecl(), MD,
+                              ThisAdjustment);
 
   CharUnits FieldOffset =
     getContext().toCharUnitsFromBits(getContext().getFieldOffset(MPD));
@@ -1409,6 +1945,7 @@ MicrosoftCXXABI::BuildMemberPointer(const CXXRecordDecl *RD,
                                     CharUnits NonVirtualBaseAdjustment) {
   assert(MD->isInstance() && "Member function must not be static!");
   MD = MD->getCanonicalDecl();
+  RD = RD->getMostRecentDecl();
   CodeGenTypes &Types = CGM.getTypes();
 
   llvm::Constant *FirstField;
@@ -1442,16 +1979,10 @@ MicrosoftCXXABI::BuildMemberPointer(const CXXRecordDecl *RD,
                                "member function in virtual base class");
       FirstField = llvm::Constant::getNullValue(CGM.VoidPtrTy);
     } else {
-      SmallString<256> ThunkName;
-      CharUnits PointerWidth = getContext().toCharUnitsFromBits(
-          getContext().getTargetInfo().getPointerWidth(0));
-      uint64_t OffsetInVFTable = ML.Index * PointerWidth.getQuantity();
-      llvm::raw_svector_ostream Out(ThunkName);
-      getMangleContext().mangleVirtualMemPtrThunk(MD, OffsetInVFTable, Out);
-      Out.flush();
-
-      llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ThunkName.str());
+      llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ML);
       FirstField = llvm::ConstantExpr::getBitCast(Thunk, CGM.VoidPtrTy);
+      // Include the vfptr adjustment if the method is in a non-primary vftable.
+      NonVirtualBaseAdjustment += ML.VFPtrOffset;
     }
   }
 
@@ -1486,9 +2017,10 @@ MicrosoftCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
 
   // If this is a single field member pointer (single inheritance), this is a
   // single icmp.
-  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
-  MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
-  if (hasOnlyOneField(MPT->isMemberFunctionPointer(), Inheritance))
+  const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+  MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
+  if (MSInheritanceAttr::hasOnlyOneField(MPT->isMemberFunctionPointer(),
+                                         Inheritance))
     return Builder.CreateICmp(Eq, L, R);
 
   // Compare the first field.
@@ -1497,7 +2029,7 @@ MicrosoftCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
   llvm::Value *Cmp0 = Builder.CreateICmp(Eq, L0, R0, "memptr.cmp.first");
 
   // Compare everything other than the first field.
-  llvm::Value *Res = 0;
+  llvm::Value *Res = nullptr;
   llvm::StructType *LType = cast<llvm::StructType>(L->getType());
   for (unsigned I = 1, E = LType->getNumElements(); I != E; ++I) {
     llvm::Value *LF = Builder.CreateExtractValue(L, I);
@@ -1548,7 +2080,7 @@ MicrosoftCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
   for (int I = 1, E = fields.size(); I < E; ++I) {
     llvm::Value *Field = Builder.CreateExtractValue(MemPtr, I);
     llvm::Value *Next = Builder.CreateICmpNE(Field, fields[I], "memptr.cmp");
-    Res = Builder.CreateAnd(Res, Next, "memptr.tobool");
+    Res = Builder.CreateOr(Res, Next, "memptr.tobool");
   }
   return Res;
 }
@@ -1607,16 +2139,14 @@ MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
 
 // Returns an adjusted base cast to i8*, since we do more address arithmetic on
 // it.
-llvm::Value *
-MicrosoftCXXABI::AdjustVirtualBase(CodeGenFunction &CGF,
-                                   const CXXRecordDecl *RD, llvm::Value *Base,
-                                   llvm::Value *VBTableOffset,
-                                   llvm::Value *VBPtrOffset) {
+llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
+    CodeGenFunction &CGF, const Expr *E, const CXXRecordDecl *RD,
+    llvm::Value *Base, llvm::Value *VBTableOffset, llvm::Value *VBPtrOffset) {
   CGBuilderTy &Builder = CGF.Builder;
   Base = Builder.CreateBitCast(Base, CGM.Int8PtrTy);
-  llvm::BasicBlock *OriginalBB = 0;
-  llvm::BasicBlock *SkipAdjustBB = 0;
-  llvm::BasicBlock *VBaseAdjustBB = 0;
+  llvm::BasicBlock *OriginalBB = nullptr;
+  llvm::BasicBlock *SkipAdjustBB = nullptr;
+  llvm::BasicBlock *VBaseAdjustBB = nullptr;
 
   // In the unspecified inheritance model, there might not be a vbtable at all,
   // in which case we need to skip the virtual base lookup.  If there is a
@@ -1637,12 +2167,18 @@ MicrosoftCXXABI::AdjustVirtualBase(CodeGenFunction &CGF,
   // know the vbptr offset.
   if (!VBPtrOffset) {
     CharUnits offs = CharUnits::Zero();
-    if (RD->getNumVBases()) {
-      offs = GetVBPtrOffsetFromBases(RD);
-    }
+    if (!RD->hasDefinition()) {
+      DiagnosticsEngine &Diags = CGF.CGM.getDiags();
+      unsigned DiagID = Diags.getCustomDiagID(
+          DiagnosticsEngine::Error,
+          "member pointer representation requires a "
+          "complete class type for %0 to perform this expression");
+      Diags.Report(E->getExprLoc(), DiagID) << RD << E->getSourceRange();
+    } else if (RD->getNumVBases())
+      offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
     VBPtrOffset = llvm::ConstantInt::get(CGM.IntTy, offs.getQuantity());
   }
-  llvm::Value *VBPtr = 0;
+  llvm::Value *VBPtr = nullptr;
   llvm::Value *VBaseOffs =
     GetVBaseOffsetFromVBPtr(CGF, Base, VBPtrOffset, VBTableOffset, &VBPtr);
   llvm::Value *AdjustedBase = Builder.CreateInBoundsGEP(VBPtr, VBaseOffs);
@@ -1659,38 +2195,41 @@ MicrosoftCXXABI::AdjustVirtualBase(CodeGenFunction &CGF,
   return AdjustedBase;
 }
 
-llvm::Value *
-MicrosoftCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
-                                              llvm::Value *Base,
-                                              llvm::Value *MemPtr,
-                                              const MemberPointerType *MPT) {
+llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
+    CodeGenFunction &CGF, const Expr *E, llvm::Value *Base, llvm::Value *MemPtr,
+    const MemberPointerType *MPT) {
   assert(MPT->isMemberDataPointer());
   unsigned AS = Base->getType()->getPointerAddressSpace();
   llvm::Type *PType =
       CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
   CGBuilderTy &Builder = CGF.Builder;
-  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
-  MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+  const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+  MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
 
   // Extract the fields we need, regardless of model.  We'll apply them if we
   // have them.
   llvm::Value *FieldOffset = MemPtr;
-  llvm::Value *VirtualBaseAdjustmentOffset = 0;
-  llvm::Value *VBPtrOffset = 0;
+  llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Value *VBPtrOffset = nullptr;
   if (MemPtr->getType()->isStructTy()) {
     // We need to extract values.
     unsigned I = 0;
     FieldOffset = Builder.CreateExtractValue(MemPtr, I++);
-    if (hasVBPtrOffsetField(Inheritance))
+    if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
       VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
-    if (hasVirtualBaseAdjustmentField(Inheritance))
+    if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
       VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
   }
 
   if (VirtualBaseAdjustmentOffset) {
-    Base = AdjustVirtualBase(CGF, RD, Base, VirtualBaseAdjustmentOffset,
+    Base = AdjustVirtualBase(CGF, E, RD, Base, VirtualBaseAdjustmentOffset,
                              VBPtrOffset);
   }
+
+  // Cast to char*.
+  Base = Builder.CreateBitCast(Base, Builder.getInt8Ty()->getPointerTo(AS));
+
+  // Apply the offset, which we assume is non-null.
   llvm::Value *Addr =
     Builder.CreateInBoundsGEP(Base, FieldOffset, "memptr.offset");
 
@@ -1699,9 +2238,9 @@ MicrosoftCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF,
   return Builder.CreateBitCast(Addr, PType);
 }
 
-static MSInheritanceModel
+static MSInheritanceAttr::Spelling
 getInheritanceFromMemptr(const MemberPointerType *MPT) {
-  return MPT->getClass()->getAsCXXRecordDecl()->getMSInheritanceModel();
+  return MPT->getMostRecentCXXRecordDecl()->getMSInheritanceModel();
 }
 
 llvm::Value *
@@ -1721,15 +2260,17 @@ MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
   const MemberPointerType *SrcTy =
     E->getSubExpr()->getType()->castAs<MemberPointerType>();
   const MemberPointerType *DstTy = E->getType()->castAs<MemberPointerType>();
-  MSInheritanceModel SrcInheritance = getInheritanceFromMemptr(SrcTy);
-  MSInheritanceModel DstInheritance = getInheritanceFromMemptr(DstTy);
   bool IsFunc = SrcTy->isMemberFunctionPointer();
 
   // If the classes use the same null representation, reinterpret_cast is a nop.
   bool IsReinterpret = E->getCastKind() == CK_ReinterpretMemberPointer;
-  if (IsReinterpret && (IsFunc ||
-                        nullFieldOffsetIsZero(SrcInheritance) ==
-                        nullFieldOffsetIsZero(DstInheritance)))
+  if (IsReinterpret && IsFunc)
+    return Src;
+
+  CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl();
+  CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl();
+  if (IsReinterpret &&
+      SrcRD->nullFieldOffsetIsZero() == DstRD->nullFieldOffsetIsZero())
     return Src;
 
   CGBuilderTy &Builder = CGF.Builder;
@@ -1755,18 +2296,19 @@ MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
 
   // Decompose src.
   llvm::Value *FirstField = Src;
-  llvm::Value *NonVirtualBaseAdjustment = 0;
-  llvm::Value *VirtualBaseAdjustmentOffset = 0;
-  llvm::Value *VBPtrOffset = 0;
-  if (!hasOnlyOneField(IsFunc, SrcInheritance)) {
+  llvm::Value *NonVirtualBaseAdjustment = nullptr;
+  llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Value *VBPtrOffset = nullptr;
+  MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel();
+  if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
     // We need to extract values.
     unsigned I = 0;
     FirstField = Builder.CreateExtractValue(Src, I++);
-    if (hasNonVirtualBaseAdjustmentField(IsFunc, SrcInheritance))
+    if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance))
       NonVirtualBaseAdjustment = Builder.CreateExtractValue(Src, I++);
-    if (hasVBPtrOffsetField(SrcInheritance))
+    if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance))
       VBPtrOffset = Builder.CreateExtractValue(Src, I++);
-    if (hasVirtualBaseAdjustmentField(SrcInheritance))
+    if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance))
       VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(Src, I++);
   }
 
@@ -1788,20 +2330,21 @@ MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
   // FIXME PR15713: Support conversions through virtually derived classes.
 
   // Recompose dst from the null struct and the adjusted fields from src.
+  MSInheritanceAttr::Spelling DstInheritance = DstRD->getMSInheritanceModel();
   llvm::Value *Dst;
-  if (hasOnlyOneField(IsFunc, DstInheritance)) {
+  if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance)) {
     Dst = FirstField;
   } else {
     Dst = llvm::UndefValue::get(DstNull->getType());
     unsigned Idx = 0;
     Dst = Builder.CreateInsertValue(Dst, FirstField, Idx++);
-    if (hasNonVirtualBaseAdjustmentField(IsFunc, DstInheritance))
+    if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
       Dst = Builder.CreateInsertValue(
         Dst, getValueOrZeroInt(NonVirtualBaseAdjustment), Idx++);
-    if (hasVBPtrOffsetField(DstInheritance))
+    if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
       Dst = Builder.CreateInsertValue(
         Dst, getValueOrZeroInt(VBPtrOffset), Idx++);
-    if (hasVirtualBaseAdjustmentField(DstInheritance))
+    if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
       Dst = Builder.CreateInsertValue(
         Dst, getValueOrZeroInt(VirtualBaseAdjustmentOffset), Idx++);
   }
@@ -1833,24 +2376,24 @@ MicrosoftCXXABI::EmitMemberPointerConversion(const CastExpr *E,
   if (E->getCastKind() == CK_ReinterpretMemberPointer)
     return Src;
 
-  MSInheritanceModel SrcInheritance = getInheritanceFromMemptr(SrcTy);
-  MSInheritanceModel DstInheritance = getInheritanceFromMemptr(DstTy);
+  MSInheritanceAttr::Spelling SrcInheritance = getInheritanceFromMemptr(SrcTy);
+  MSInheritanceAttr::Spelling DstInheritance = getInheritanceFromMemptr(DstTy);
 
   // Decompose src.
   llvm::Constant *FirstField = Src;
-  llvm::Constant *NonVirtualBaseAdjustment = 0;
-  llvm::Constant *VirtualBaseAdjustmentOffset = 0;
-  llvm::Constant *VBPtrOffset = 0;
+  llvm::Constant *NonVirtualBaseAdjustment = nullptr;
+  llvm::Constant *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Constant *VBPtrOffset = nullptr;
   bool IsFunc = SrcTy->isMemberFunctionPointer();
-  if (!hasOnlyOneField(IsFunc, SrcInheritance)) {
+  if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
     // We need to extract values.
     unsigned I = 0;
     FirstField = Src->getAggregateElement(I++);
-    if (hasNonVirtualBaseAdjustmentField(IsFunc, SrcInheritance))
+    if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance))
       NonVirtualBaseAdjustment = Src->getAggregateElement(I++);
-    if (hasVBPtrOffsetField(SrcInheritance))
+    if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance))
       VBPtrOffset = Src->getAggregateElement(I++);
-    if (hasVirtualBaseAdjustmentField(SrcInheritance))
+    if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance))
       VirtualBaseAdjustmentOffset = Src->getAggregateElement(I++);
   }
 
@@ -1873,56 +2416,54 @@ MicrosoftCXXABI::EmitMemberPointerConversion(const CastExpr *E,
   // FIXME PR15713: Support conversions through virtually derived classes.
 
   // Recompose dst from the null struct and the adjusted fields from src.
-  if (hasOnlyOneField(IsFunc, DstInheritance))
+  if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance))
     return FirstField;
 
   llvm::SmallVector<llvm::Constant *, 4> Fields;
   Fields.push_back(FirstField);
-  if (hasNonVirtualBaseAdjustmentField(IsFunc, DstInheritance))
+  if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
     Fields.push_back(getConstantOrZeroInt(NonVirtualBaseAdjustment));
-  if (hasVBPtrOffsetField(DstInheritance))
+  if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
     Fields.push_back(getConstantOrZeroInt(VBPtrOffset));
-  if (hasVirtualBaseAdjustmentField(DstInheritance))
+  if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
     Fields.push_back(getConstantOrZeroInt(VirtualBaseAdjustmentOffset));
   return llvm::ConstantStruct::getAnon(Fields);
 }
 
-llvm::Value *
-MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
-                                                 llvm::Value *&This,
-                                                 llvm::Value *MemPtr,
-                                                 const MemberPointerType *MPT) {
+llvm::Value *MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
+    CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+    llvm::Value *MemPtr, const MemberPointerType *MPT) {
   assert(MPT->isMemberFunctionPointer());
   const FunctionProtoType *FPT =
     MPT->getPointeeType()->castAs<FunctionProtoType>();
-  const CXXRecordDecl *RD = MPT->getClass()->getAsCXXRecordDecl();
+  const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
   llvm::FunctionType *FTy =
     CGM.getTypes().GetFunctionType(
       CGM.getTypes().arrangeCXXMethodType(RD, FPT));
   CGBuilderTy &Builder = CGF.Builder;
 
-  MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
+  MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
 
   // Extract the fields we need, regardless of model.  We'll apply them if we
   // have them.
   llvm::Value *FunctionPointer = MemPtr;
-  llvm::Value *NonVirtualBaseAdjustment = NULL;
-  llvm::Value *VirtualBaseAdjustmentOffset = NULL;
-  llvm::Value *VBPtrOffset = NULL;
+  llvm::Value *NonVirtualBaseAdjustment = nullptr;
+  llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Value *VBPtrOffset = nullptr;
   if (MemPtr->getType()->isStructTy()) {
     // We need to extract values.
     unsigned I = 0;
     FunctionPointer = Builder.CreateExtractValue(MemPtr, I++);
-    if (hasNonVirtualBaseAdjustmentField(MPT, Inheritance))
+    if (MSInheritanceAttr::hasNVOffsetField(MPT, Inheritance))
       NonVirtualBaseAdjustment = Builder.CreateExtractValue(MemPtr, I++);
-    if (hasVBPtrOffsetField(Inheritance))
+    if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
       VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
-    if (hasVirtualBaseAdjustmentField(Inheritance))
+    if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
       VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
   }
 
   if (VirtualBaseAdjustmentOffset) {
-    This = AdjustVirtualBase(CGF, RD, This, VirtualBaseAdjustmentOffset,
+    This = AdjustVirtualBase(CGF, E, RD, This, VirtualBaseAdjustmentOffset,
                              VBPtrOffset);
   }
 
@@ -1939,3 +2480,398 @@ MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
 CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) {
   return new MicrosoftCXXABI(CGM);
 }
+
+// MS RTTI Overview:
+// The run time type information emitted by cl.exe contains 5 distinct types of
+// structures.  Many of them reference each other.
+//
+// TypeInfo:  Static classes that are returned by typeid.
+//
+// CompleteObjectLocator:  Referenced by vftables.  They contain information
+//   required for dynamic casting, including OffsetFromTop.  They also contain
+//   a reference to the TypeInfo for the type and a reference to the
+//   CompleteHierarchyDescriptor for the type.
+//
+// ClassHieararchyDescriptor: Contains information about a class hierarchy.
+//   Used during dynamic_cast to walk a class hierarchy.  References a base
+//   class array and the size of said array.
+//
+// BaseClassArray: Contains a list of classes in a hierarchy.  BaseClassArray is
+//   somewhat of a misnomer because the most derived class is also in the list
+//   as well as multiple copies of virtual bases (if they occur multiple times
+//   in the hiearchy.)  The BaseClassArray contains one BaseClassDescriptor for
+//   every path in the hierarchy, in pre-order depth first order.  Note, we do
+//   not declare a specific llvm type for BaseClassArray, it's merely an array
+//   of BaseClassDescriptor pointers.
+//
+// BaseClassDescriptor: Contains information about a class in a class hierarchy.
+//   BaseClassDescriptor is also somewhat of a misnomer for the same reason that
+//   BaseClassArray is.  It contains information about a class within a
+//   hierarchy such as: is this base is ambiguous and what is its offset in the
+//   vbtable.  The names of the BaseClassDescriptors have all of their fields
+//   mangled into them so they can be aggressively deduplicated by the linker.
+
+static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) {
+  StringRef MangledName("\01??_7type_info@@6B@");
+  if (auto VTable = CGM.getModule().getNamedGlobal(MangledName))
+    return VTable;
+  return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
+                                  /*Constant=*/true,
+                                  llvm::GlobalVariable::ExternalLinkage,
+                                  /*Initializer=*/nullptr, MangledName);
+}
+
+namespace {
+
+/// \brief A Helper struct that stores information about a class in a class
+/// hierarchy.  The information stored in these structs struct is used during
+/// the generation of ClassHierarchyDescriptors and BaseClassDescriptors.
+// During RTTI creation, MSRTTIClasses are stored in a contiguous array with
+// implicit depth first pre-order tree connectivity.  getFirstChild and
+// getNextSibling allow us to walk the tree efficiently.
+struct MSRTTIClass {
+  enum {
+    IsPrivateOnPath = 1 | 8,
+    IsAmbiguous = 2,
+    IsPrivate = 4,
+    IsVirtual = 16,
+    HasHierarchyDescriptor = 64
+  };
+  MSRTTIClass(const CXXRecordDecl *RD) : RD(RD) {}
+  uint32_t initialize(const MSRTTIClass *Parent,
+                      const CXXBaseSpecifier *Specifier);
+
+  MSRTTIClass *getFirstChild() { return this + 1; }
+  static MSRTTIClass *getNextChild(MSRTTIClass *Child) {
+    return Child + 1 + Child->NumBases;
+  }
+
+  const CXXRecordDecl *RD, *VirtualRoot;
+  uint32_t Flags, NumBases, OffsetInVBase;
+};
+
+/// \brief Recursively initialize the base class array.
+uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent,
+                                 const CXXBaseSpecifier *Specifier) {
+  Flags = HasHierarchyDescriptor;
+  if (!Parent) {
+    VirtualRoot = nullptr;
+    OffsetInVBase = 0;
+  } else {
+    if (Specifier->getAccessSpecifier() != AS_public)
+      Flags |= IsPrivate | IsPrivateOnPath;
+    if (Specifier->isVirtual()) {
+      Flags |= IsVirtual;
+      VirtualRoot = RD;
+      OffsetInVBase = 0;
+    } else {
+      if (Parent->Flags & IsPrivateOnPath)
+        Flags |= IsPrivateOnPath;
+      VirtualRoot = Parent->VirtualRoot;
+      OffsetInVBase = Parent->OffsetInVBase + RD->getASTContext()
+          .getASTRecordLayout(Parent->RD).getBaseClassOffset(RD).getQuantity();
+    }
+  }
+  NumBases = 0;
+  MSRTTIClass *Child = getFirstChild();
+  for (const CXXBaseSpecifier &Base : RD->bases()) {
+    NumBases += Child->initialize(this, &Base) + 1;
+    Child = getNextChild(Child);
+  }
+  return NumBases;
+}
+
+static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) {
+  switch (Ty->getLinkage()) {
+  case NoLinkage:
+  case InternalLinkage:
+  case UniqueExternalLinkage:
+    return llvm::GlobalValue::InternalLinkage;
+
+  case VisibleNoLinkage:
+  case ExternalLinkage:
+    return llvm::GlobalValue::LinkOnceODRLinkage;
+  }
+  llvm_unreachable("Invalid linkage!");
+}
+
+/// \brief An ephemeral helper class for building MS RTTI types.  It caches some
+/// calls to the module and information about the most derived class in a
+/// hierarchy.
+struct MSRTTIBuilder {
+  enum {
+    HasBranchingHierarchy = 1,
+    HasVirtualBranchingHierarchy = 2,
+    HasAmbiguousBases = 4
+  };
+
+  MSRTTIBuilder(MicrosoftCXXABI &ABI, const CXXRecordDecl *RD)
+      : CGM(ABI.CGM), Context(CGM.getContext()),
+        VMContext(CGM.getLLVMContext()), Module(CGM.getModule()), RD(RD),
+        Linkage(getLinkageForRTTI(CGM.getContext().getTagDeclType(RD))),
+        ABI(ABI) {}
+
+  llvm::GlobalVariable *getBaseClassDescriptor(const MSRTTIClass &Classes);
+  llvm::GlobalVariable *
+  getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes);
+  llvm::GlobalVariable *getClassHierarchyDescriptor();
+  llvm::GlobalVariable *getCompleteObjectLocator(const VPtrInfo *Info);
+
+  CodeGenModule &CGM;
+  ASTContext &Context;
+  llvm::LLVMContext &VMContext;
+  llvm::Module &Module;
+  const CXXRecordDecl *RD;
+  llvm::GlobalVariable::LinkageTypes Linkage;
+  MicrosoftCXXABI &ABI;
+};
+
+} // namespace
+
+/// \brief Recursively serializes a class hierarchy in pre-order depth first
+/// order.
+static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes,
+                                    const CXXRecordDecl *RD) {
+  Classes.push_back(MSRTTIClass(RD));
+  for (const CXXBaseSpecifier &Base : RD->bases())
+    serializeClassHierarchy(Classes, Base.getType()->getAsCXXRecordDecl());
+}
+
+/// \brief Find ambiguity among base classes.
+static void
+detectAmbiguousBases(SmallVectorImpl<MSRTTIClass> &Classes) {
+  llvm::SmallPtrSet<const CXXRecordDecl *, 8> VirtualBases;
+  llvm::SmallPtrSet<const CXXRecordDecl *, 8> UniqueBases;
+  llvm::SmallPtrSet<const CXXRecordDecl *, 8> AmbiguousBases;
+  for (MSRTTIClass *Class = &Classes.front(); Class <= &Classes.back();) {
+    if ((Class->Flags & MSRTTIClass::IsVirtual) &&
+        !VirtualBases.insert(Class->RD)) {
+      Class = MSRTTIClass::getNextChild(Class);
+      continue;
+    }
+    if (!UniqueBases.insert(Class->RD))
+      AmbiguousBases.insert(Class->RD);
+    Class++;
+  }
+  if (AmbiguousBases.empty())
+    return;
+  for (MSRTTIClass &Class : Classes)
+    if (AmbiguousBases.count(Class.RD))
+      Class.Flags |= MSRTTIClass::IsAmbiguous;
+}
+
+llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() {
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    ABI.getMangleContext().mangleCXXRTTIClassHierarchyDescriptor(RD, Out);
+  }
+
+  // Check to see if we've already declared this ClassHierarchyDescriptor.
+  if (auto CHD = Module.getNamedGlobal(MangledName))
+    return CHD;
+
+  // Serialize the class hierarchy and initialize the CHD Fields.
+  SmallVector<MSRTTIClass, 8> Classes;
+  serializeClassHierarchy(Classes, RD);
+  Classes.front().initialize(/*Parent=*/nullptr, /*Specifier=*/nullptr);
+  detectAmbiguousBases(Classes);
+  int Flags = 0;
+  for (auto Class : Classes) {
+    if (Class.RD->getNumBases() > 1)
+      Flags |= HasBranchingHierarchy;
+    // Note: cl.exe does not calculate "HasAmbiguousBases" correctly.  We
+    // believe the field isn't actually used.
+    if (Class.Flags & MSRTTIClass::IsAmbiguous)
+      Flags |= HasAmbiguousBases;
+  }
+  if ((Flags & HasBranchingHierarchy) && RD->getNumVBases() != 0)
+    Flags |= HasVirtualBranchingHierarchy;
+  // These gep indices are used to get the address of the first element of the
+  // base class array.
+  llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.IntTy, 0),
+                               llvm::ConstantInt::get(CGM.IntTy, 0)};
+
+  // Forward-declare the class hierarchy descriptor
+  auto Type = ABI.getClassHierarchyDescriptorType();
+  auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+                                      /*Initializer=*/nullptr,
+                                      MangledName.c_str());
+
+  // Initialize the base class ClassHierarchyDescriptor.
+  llvm::Constant *Fields[] = {
+      llvm::ConstantInt::get(CGM.IntTy, 0), // Unknown
+      llvm::ConstantInt::get(CGM.IntTy, Flags),
+      llvm::ConstantInt::get(CGM.IntTy, Classes.size()),
+      ABI.getImageRelativeConstant(llvm::ConstantExpr::getInBoundsGetElementPtr(
+          getBaseClassArray(Classes),
+          llvm::ArrayRef<llvm::Value *>(GEPIndices))),
+  };
+  CHD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
+  return CHD;
+}
+
+llvm::GlobalVariable *
+MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) {
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    ABI.getMangleContext().mangleCXXRTTIBaseClassArray(RD, Out);
+  }
+
+  // Forward-declare the base class array.
+  // cl.exe pads the base class array with 1 (in 32 bit mode) or 4 (in 64 bit
+  // mode) bytes of padding.  We provide a pointer sized amount of padding by
+  // adding +1 to Classes.size().  The sections have pointer alignment and are
+  // marked pick-any so it shouldn't matter.
+  llvm::Type *PtrType = ABI.getImageRelativeType(
+      ABI.getBaseClassDescriptorType()->getPointerTo());
+  auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1);
+  auto *BCA = new llvm::GlobalVariable(
+      Module, ArrType,
+      /*Constant=*/true, Linkage, /*Initializer=*/nullptr, MangledName.c_str());
+
+  // Initialize the BaseClassArray.
+  SmallVector<llvm::Constant *, 8> BaseClassArrayData;
+  for (MSRTTIClass &Class : Classes)
+    BaseClassArrayData.push_back(
+        ABI.getImageRelativeConstant(getBaseClassDescriptor(Class)));
+  BaseClassArrayData.push_back(llvm::Constant::getNullValue(PtrType));
+  BCA->setInitializer(llvm::ConstantArray::get(ArrType, BaseClassArrayData));
+  return BCA;
+}
+
+llvm::GlobalVariable *
+MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) {
+  // Compute the fields for the BaseClassDescriptor.  They are computed up front
+  // because they are mangled into the name of the object.
+  uint32_t OffsetInVBTable = 0;
+  int32_t VBPtrOffset = -1;
+  if (Class.VirtualRoot) {
+    auto &VTableContext = CGM.getMicrosoftVTableContext();
+    OffsetInVBTable = VTableContext.getVBTableIndex(RD, Class.VirtualRoot) * 4;
+    VBPtrOffset = Context.getASTRecordLayout(RD).getVBPtrOffset().getQuantity();
+  }
+
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    ABI.getMangleContext().mangleCXXRTTIBaseClassDescriptor(
+        Class.RD, Class.OffsetInVBase, VBPtrOffset, OffsetInVBTable,
+        Class.Flags, Out);
+  }
+
+  // Check to see if we've already declared this object.
+  if (auto BCD = Module.getNamedGlobal(MangledName))
+    return BCD;
+
+  // Forward-declare the base class descriptor.
+  auto Type = ABI.getBaseClassDescriptorType();
+  auto BCD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+                                      /*Initializer=*/nullptr,
+                                      MangledName.c_str());
+
+  // Initialize the BaseClassDescriptor.
+  llvm::Constant *Fields[] = {
+      ABI.getImageRelativeConstant(
+          ABI.getAddrOfRTTIDescriptor(Context.getTypeDeclType(Class.RD))),
+      llvm::ConstantInt::get(CGM.IntTy, Class.NumBases),
+      llvm::ConstantInt::get(CGM.IntTy, Class.OffsetInVBase),
+      llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
+      llvm::ConstantInt::get(CGM.IntTy, OffsetInVBTable),
+      llvm::ConstantInt::get(CGM.IntTy, Class.Flags),
+      ABI.getImageRelativeConstant(
+          MSRTTIBuilder(ABI, Class.RD).getClassHierarchyDescriptor()),
+  };
+  BCD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
+  return BCD;
+}
+
+llvm::GlobalVariable *
+MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo *Info) {
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    ABI.getMangleContext().mangleCXXRTTICompleteObjectLocator(RD, Info->MangledPath, Out);
+  }
+
+  // Check to see if we've already computed this complete object locator.
+  if (auto COL = Module.getNamedGlobal(MangledName))
+    return COL;
+
+  // Compute the fields of the complete object locator.
+  int OffsetToTop = Info->FullOffsetInMDC.getQuantity();
+  int VFPtrOffset = 0;
+  // The offset includes the vtordisp if one exists.
+  if (const CXXRecordDecl *VBase = Info->getVBaseWithVPtr())
+    if (Context.getASTRecordLayout(RD)
+      .getVBaseOffsetsMap()
+      .find(VBase)
+      ->second.hasVtorDisp())
+      VFPtrOffset = Info->NonVirtualOffset.getQuantity() + 4;
+
+  // Forward-declare the complete object locator.
+  llvm::StructType *Type = ABI.getCompleteObjectLocatorType();
+  auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+    /*Initializer=*/nullptr, MangledName.c_str());
+
+  // Initialize the CompleteObjectLocator.
+  llvm::Constant *Fields[] = {
+      llvm::ConstantInt::get(CGM.IntTy, ABI.isImageRelative()),
+      llvm::ConstantInt::get(CGM.IntTy, OffsetToTop),
+      llvm::ConstantInt::get(CGM.IntTy, VFPtrOffset),
+      ABI.getImageRelativeConstant(
+          CGM.GetAddrOfRTTIDescriptor(Context.getTypeDeclType(RD))),
+      ABI.getImageRelativeConstant(getClassHierarchyDescriptor()),
+      ABI.getImageRelativeConstant(COL),
+  };
+  llvm::ArrayRef<llvm::Constant *> FieldsRef(Fields);
+  if (!ABI.isImageRelative())
+    FieldsRef = FieldsRef.drop_back();
+  COL->setInitializer(llvm::ConstantStruct::get(Type, FieldsRef));
+  return COL;
+}
+
+/// \brief Gets a TypeDescriptor.  Returns a llvm::Constant * rather than a
+/// llvm::GlobalVariable * because different type descriptors have different
+/// types, and need to be abstracted.  They are abstracting by casting the
+/// address to an Int8PtrTy.
+llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
+  SmallString<256> MangledName, TypeInfoString;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    getMangleContext().mangleCXXRTTI(Type, Out);
+  }
+
+  // Check to see if we've already declared this TypeDescriptor.
+  if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
+    return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+
+  // Compute the fields for the TypeDescriptor.
+  {
+    llvm::raw_svector_ostream Out(TypeInfoString);
+    getMangleContext().mangleCXXRTTIName(Type, Out);
+  }
+
+  // Declare and initialize the TypeDescriptor.
+  llvm::Constant *Fields[] = {
+    getTypeInfoVTable(CGM),                        // VFPtr
+    llvm::ConstantPointerNull::get(CGM.Int8PtrTy), // Runtime data
+    llvm::ConstantDataArray::getString(CGM.getLLVMContext(), TypeInfoString)};
+  llvm::StructType *TypeDescriptorType =
+      getTypeDescriptorType(TypeInfoString);
+  return llvm::ConstantExpr::getBitCast(
+      new llvm::GlobalVariable(
+          CGM.getModule(), TypeDescriptorType, /*Constant=*/false,
+          getLinkageForRTTI(Type),
+          llvm::ConstantStruct::get(TypeDescriptorType, Fields),
+          MangledName.c_str()),
+      CGM.Int8PtrTy);
+}
+
+/// \brief Gets or a creates a Microsoft CompleteObjectLocator.
+llvm::GlobalVariable *
+MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD,
+                                            const VPtrInfo *Info) {
+  return MSRTTIBuilder(*this, RD).getCompleteObjectLocator(Info);
+}
diff --git a/lib/CodeGen/MicrosoftVBTables.cpp b/lib/CodeGen/MicrosoftVBTables.cpp
deleted file mode 100644
index dabf52c1ccc1..000000000000
--- a/lib/CodeGen/MicrosoftVBTables.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-//===--- MicrosoftVBTables.cpp - Virtual Base Table Emission --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class generates data about MSVC virtual base tables.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MicrosoftVBTables.h"
-#include "CodeGenModule.h"
-#include "CGCXXABI.h"
-
-namespace clang {
-namespace CodeGen {
-
-/// Holds intermediate data about a path to a vbptr inside a base subobject.
-struct VBTablePath {
-  VBTablePath(const VBTableInfo &VBInfo)
-    : VBInfo(VBInfo), NextBase(VBInfo.VBPtrSubobject.getBase()) { }
-
-  /// All the data needed to build a vbtable, minus the GlobalVariable whose
-  /// name we haven't computed yet.
-  VBTableInfo VBInfo;
-
-  /// Next base to use for disambiguation.  Can be null if we've already
-  /// disambiguated this path once.
-  const CXXRecordDecl *NextBase;
-
-  /// Path is not really a full path like a CXXBasePath.  It holds the subset of
-  /// records that need to be mangled into the vbtable symbol name in order to get
-  /// a unique name.
-  llvm::SmallVector<const CXXRecordDecl *, 1> Path;
-};
-
-VBTableBuilder::VBTableBuilder(CodeGenModule &CGM,
-                               const CXXRecordDecl *MostDerived)
-    : CGM(CGM), MostDerived(MostDerived),
-      DerivedLayout(CGM.getContext().getASTRecordLayout(MostDerived)) {}
-
-void VBTableBuilder::enumerateVBTables(VBTableVector &VBTables) {
-  VBTablePathVector Paths;
-  findUnambiguousPaths(MostDerived, BaseSubobject(MostDerived,
-                                                  CharUnits::Zero()), Paths);
-  for (VBTablePathVector::iterator I = Paths.begin(), E = Paths.end();
-       I != E; ++I) {
-    VBTablePath *P = *I;
-    P->VBInfo.GV = getAddrOfVBTable(P->VBInfo.ReusingBase, P->Path);
-    VBTables.push_back(P->VBInfo);
-  }
-}
-
-
-void VBTableBuilder::findUnambiguousPaths(const CXXRecordDecl *ReusingBase,
-                                          BaseSubobject CurSubobject,
-                                          VBTablePathVector &Paths) {
-  size_t PathsStart = Paths.size();
-  bool ReuseVBPtrFromBase = true;
-  const CXXRecordDecl *CurBase = CurSubobject.getBase();
-  const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(CurBase);
-
-  // If this base has a vbptr, then we've found a path.  These are not full
-  // paths, so we don't use CXXBasePath.
-  if (Layout.hasOwnVBPtr()) {
-    ReuseVBPtrFromBase = false;
-    VBTablePath *Info = new VBTablePath(
-      VBTableInfo(ReusingBase, CurSubobject, /*GV=*/0));
-    Paths.push_back(Info);
-  }
-
-  // Recurse onto any bases which themselves have virtual bases.
-  for (CXXRecordDecl::base_class_const_iterator I = CurBase->bases_begin(),
-       E = CurBase->bases_end(); I != E; ++I) {
-    const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl();
-    if (!Base->getNumVBases())
-      continue;  // Bases without virtual bases have no vbptrs.
-    CharUnits NextOffset;
-    const CXXRecordDecl *NextReusingBase = Base;
-    if (I->isVirtual()) {
-      if (!VBasesSeen.insert(Base))
-        continue;  // Don't visit virtual bases twice.
-      NextOffset = DerivedLayout.getVBaseClassOffset(Base);
-    } else {
-      NextOffset = (CurSubobject.getBaseOffset() +
-                    Layout.getBaseClassOffset(Base));
-
-      // If CurBase didn't have a vbptr, then ReusingBase will reuse the vbptr
-      // from the first non-virtual base with vbases for its vbptr.
-      if (ReuseVBPtrFromBase) {
-        NextReusingBase = ReusingBase;
-        ReuseVBPtrFromBase = false;
-      }
-    }
-
-    size_t NumPaths = Paths.size();
-    findUnambiguousPaths(NextReusingBase, BaseSubobject(Base, NextOffset),
-                         Paths);
-
-    // Tag paths through this base with the base itself.  We might use it to
-    // disambiguate.
-    for (size_t I = NumPaths, E = Paths.size(); I != E; ++I)
-      Paths[I]->NextBase = Base;
-  }
-
-  bool AmbiguousPaths = rebucketPaths(Paths, PathsStart);
-  if (AmbiguousPaths)
-    rebucketPaths(Paths, PathsStart, /*SecondPass=*/true);
-
-#ifndef NDEBUG
-  // Check that the paths are in fact unique.
-  for (size_t I = PathsStart + 1, E = Paths.size(); I != E; ++I) {
-    assert(Paths[I]->Path != Paths[I - 1]->Path && "vbtable paths are not unique");
-  }
-#endif
-}
-
-static bool pathCompare(VBTablePath *LHS, VBTablePath *RHS) {
-  return LHS->Path < RHS->Path;
-}
-
-void VBTableBuilder::extendPath(VBTablePath *P, bool SecondPass) {
-  assert(P->NextBase || SecondPass);
-  if (P->NextBase) {
-    P->Path.push_back(P->NextBase);
-    P->NextBase = 0;  // Prevent the path from being extended twice.
-  }
-}
-
-bool VBTableBuilder::rebucketPaths(VBTablePathVector &Paths, size_t PathsStart,
-                                   bool SecondPass) {
-  // What we're essentially doing here is bucketing together ambiguous paths.
-  // Any bucket with more than one path in it gets extended by NextBase, which
-  // is usually the direct base of the inherited the vbptr.  This code uses a
-  // sorted vector to implement a multiset to form the buckets.  Note that the
-  // ordering is based on pointers, but it doesn't change our output order.  The
-  // current algorithm is designed to match MSVC 2012's names.
-  // TODO: Implement MSVC 2010 or earlier names to avoid extra vbtable cruft.
-  VBTablePathVector PathsSorted(&Paths[PathsStart], &Paths.back() + 1);
-  std::sort(PathsSorted.begin(), PathsSorted.end(), pathCompare);
-  bool AmbiguousPaths = false;
-  for (size_t I = 0, E = PathsSorted.size(); I != E;) {
-    // Scan forward to find the end of the bucket.
-    size_t BucketStart = I;
-    do {
-      ++I;
-    } while (I != E && PathsSorted[BucketStart]->Path == PathsSorted[I]->Path);
-
-    // If this bucket has multiple paths, extend them all.
-    if (I - BucketStart > 1) {
-      AmbiguousPaths = true;
-      for (size_t II = BucketStart; II != I; ++II)
-        extendPath(PathsSorted[II], SecondPass);
-    }
-  }
-  return AmbiguousPaths;
-}
-
-llvm::GlobalVariable *
-VBTableBuilder::getAddrOfVBTable(const CXXRecordDecl *ReusingBase,
-                                 ArrayRef<const CXXRecordDecl *> BasePath) {
-  // Caching at this layer is redundant with the caching in EnumerateVBTables().
-
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
-  MicrosoftMangleContext &Mangler =
-      cast<MicrosoftMangleContext>(CGM.getCXXABI().getMangleContext());
-  Mangler.mangleCXXVBTable(MostDerived, BasePath, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
-
-  llvm::ArrayType *VBTableType =
-    llvm::ArrayType::get(CGM.IntTy, 1 + ReusingBase->getNumVBases());
-
-  assert(!CGM.getModule().getNamedGlobal(Name) &&
-         "vbtable with this name already exists: mangling bug?");
-  llvm::GlobalVariable *VBTable =
-    CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType,
-                                          llvm::GlobalValue::ExternalLinkage);
-  VBTable->setUnnamedAddr(true);
-  return VBTable;
-}
-
-void VBTableInfo::EmitVBTableDefinition(
-    CodeGenModule &CGM, const CXXRecordDecl *RD,
-    llvm::GlobalVariable::LinkageTypes Linkage) const {
-  assert(RD->getNumVBases() && ReusingBase->getNumVBases() &&
-         "should only emit vbtables for classes with vbtables");
-
-  const ASTRecordLayout &BaseLayout =
-    CGM.getContext().getASTRecordLayout(VBPtrSubobject.getBase());
-  const ASTRecordLayout &DerivedLayout =
-    CGM.getContext().getASTRecordLayout(RD);
-
-  SmallVector<llvm::Constant *, 4> Offsets(1 + ReusingBase->getNumVBases(), 0);
-
-  // The offset from ReusingBase's vbptr to itself always leads.
-  CharUnits VBPtrOffset = BaseLayout.getVBPtrOffset();
-  Offsets[0] = llvm::ConstantInt::get(CGM.IntTy, -VBPtrOffset.getQuantity());
-
-  MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
-  for (CXXRecordDecl::base_class_const_iterator I = ReusingBase->vbases_begin(),
-       E = ReusingBase->vbases_end(); I != E; ++I) {
-    const CXXRecordDecl *VBase = I->getType()->getAsCXXRecordDecl();
-    CharUnits Offset = DerivedLayout.getVBaseClassOffset(VBase);
-    assert(!Offset.isNegative());
-    // Make it relative to the subobject vbptr.
-    Offset -= VBPtrSubobject.getBaseOffset() + VBPtrOffset;
-    unsigned VBIndex = Context.getVBTableIndex(ReusingBase, VBase);
-    assert(Offsets[VBIndex] == 0 && "The same vbindex seen twice?");
-    Offsets[VBIndex] = llvm::ConstantInt::get(CGM.IntTy, Offset.getQuantity());
-  }
-
-  assert(Offsets.size() ==
-         cast<llvm::ArrayType>(cast<llvm::PointerType>(GV->getType())
-                               ->getElementType())->getNumElements());
-  llvm::ArrayType *VBTableType =
-    llvm::ArrayType::get(CGM.IntTy, Offsets.size());
-  llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
-  GV->setInitializer(Init);
-
-  // Set the correct linkage.
-  GV->setLinkage(Linkage);
-
-  // Set the right visibility.
-  CGM.setTypeVisibility(GV, RD, CodeGenModule::TVK_ForVTable);
-}
-
-} // namespace CodeGen
-} // namespace clang
diff --git a/lib/CodeGen/MicrosoftVBTables.h b/lib/CodeGen/MicrosoftVBTables.h
deleted file mode 100644
index 4ad8e07582ef..000000000000
--- a/lib/CodeGen/MicrosoftVBTables.h
+++ /dev/null
@@ -1,129 +0,0 @@
-//===--- MicrosoftVBTables.h - Virtual Base Table Emission ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class generates data about MSVC virtual base tables.
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/AST/BaseSubobject.h"
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/IR/GlobalVariable.h"
-#include <vector>
-
-namespace clang {
-
-class ASTRecordLayout;
-
-namespace CodeGen {
-
-class CodeGenModule;
-
-struct VBTableInfo {
-  VBTableInfo(const CXXRecordDecl *ReusingBase, BaseSubobject VBPtrSubobject,
-              llvm::GlobalVariable *GV)
-    : ReusingBase(ReusingBase), VBPtrSubobject(VBPtrSubobject), GV(GV) { }
-
-  /// The vbtable will hold all of the virtual bases of ReusingBase.  This may
-  /// or may not be the same class as VBPtrSubobject.Base.  A derived class will
-  /// reuse the vbptr of the first non-virtual base subobject that has one.
-  const CXXRecordDecl *ReusingBase;
-
-  /// The vbptr is stored inside this subobject.
-  BaseSubobject VBPtrSubobject;
-
-  /// The GlobalVariable for this vbtable.
-  llvm::GlobalVariable *GV;
-
-  /// \brief Emits a definition for GV by setting it's initializer.
-  void EmitVBTableDefinition(CodeGenModule &CGM, const CXXRecordDecl *RD,
-                             llvm::GlobalVariable::LinkageTypes Linkage) const;
-};
-
-// These are embedded in a DenseMap and the elements are large, so we don't want
-// SmallVector.
-typedef std::vector<VBTableInfo> VBTableVector;
-
-struct VBTablePath;
-
-typedef llvm::SmallVector<VBTablePath *, 6> VBTablePathVector;
-
-/// Produces MSVC-compatible vbtable data.  The symbols produced by this builder
-/// match those produced by MSVC 2012, which is different from MSVC 2010.
-///
-/// Unlike Itanium, which uses only one vtable per class, MSVC uses a different
-/// symbol for every "address point" installed in base subobjects.  As a result,
-/// we have to compute unique symbols for every table.  Since there can be
-/// multiple non-virtual base subobjects of the same class, combining the most
-/// derived class with the base containing the vtable is insufficient.  The most
-/// trivial algorithm would be to mangle in the entire path from base to most
-/// derived, but that would be too easy and would create unnecessarily large
-/// symbols.  ;)
-///
-/// MSVC 2012 appears to minimize the vbtable names using the following
-/// algorithm.  First, walk the class hierarchy in the usual order, depth first,
-/// left to right, to find all of the subobjects which contain a vbptr field.
-/// Visiting each class node yields a list of inheritance paths to vbptrs.  Each
-/// record with a vbptr creates an initially empty path.
-///
-/// To combine paths from child nodes, the paths are compared to check for
-/// ambiguity.  Paths are "ambiguous" if multiple paths have the same set of
-/// components in the same order.  Each group of ambiguous paths is extended by
-/// appending the class of the base from which it came.  If the current class
-/// node produced an ambiguous path, its path is extended with the current class.
-/// After extending paths, MSVC again checks for ambiguity, and extends any
-/// ambiguous path which wasn't already extended.  Because each node yields an
-/// unambiguous set of paths, MSVC doesn't need to extend any path more than once
-/// to produce an unambiguous set of paths.
-///
-/// The VBTableBuilder class attempts to implement this algorithm by repeatedly
-/// bucketing paths together by sorting them.
-///
-/// TODO: Presumably vftables use the same algorithm.
-///
-/// TODO: Implement the MSVC 2010 name mangling scheme to avoid emitting
-/// duplicate vbtables with different symbols.
-class VBTableBuilder {
-public:
-  VBTableBuilder(CodeGenModule &CGM, const CXXRecordDecl *MostDerived);
-
-  void enumerateVBTables(VBTableVector &VBTables);
-
-private:
-  bool hasVBPtr(const CXXRecordDecl *RD);
-
-  llvm::GlobalVariable *getAddrOfVBTable(const CXXRecordDecl *ReusingBase,
-                                      ArrayRef<const CXXRecordDecl *> BasePath);
-
-  /// Enumerates paths to bases with vbptrs.  The paths elements are compressed
-  /// to contain only the classes necessary to form an unambiguous path.
-  void findUnambiguousPaths(const CXXRecordDecl *ReusingBase,
-                            BaseSubobject CurSubobject,
-                            VBTablePathVector &Paths);
-
-  void extendPath(VBTablePath *Info, bool SecondPass);
-
-  bool rebucketPaths(VBTablePathVector &Paths, size_t PathsStart,
-                     bool SecondPass = false);
-
-  CodeGenModule &CGM;
-
-  const CXXRecordDecl *MostDerived;
-
-  /// Caches the layout of the most derived class.
-  const ASTRecordLayout &DerivedLayout;
-
-  /// Set of vbases to avoid re-visiting the same vbases.
-  llvm::SmallPtrSet<const CXXRecordDecl*, 4> VBasesSeen;
-};
-
-} // namespace CodeGen
-
-} // namespace clang
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index bc7acbc39cab..c5d18d3286a7 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -12,30 +12,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/CodeGen/ModuleBuilder.h"
-#include "CodeGenModule.h"
 #include "CGDebugInfo.h"
+#include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include <memory>
 using namespace clang;
 
 namespace {
   class CodeGeneratorImpl : public CodeGenerator {
     DiagnosticsEngine &Diags;
-    OwningPtr<const llvm::DataLayout> TD;
+    std::unique_ptr<const llvm::DataLayout> TD;
     ASTContext *Ctx;
     const CodeGenOptions CodeGenOpts;  // Intentionally copied in.
   protected:
-    OwningPtr<llvm::Module> M;
-    OwningPtr<CodeGen::CodeGenModule> Builder;
+    std::unique_ptr<llvm::Module> M;
+    std::unique_ptr<CodeGen::CodeGenModule> Builder;
+
   public:
     CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string& ModuleName,
                       const CodeGenOptions &CGO, llvm::LLVMContext& C)
@@ -44,15 +45,28 @@ namespace {
 
     virtual ~CodeGeneratorImpl() {}
 
-    virtual llvm::Module* GetModule() {
+    llvm::Module* GetModule() override {
       return M.get();
     }
 
-    virtual llvm::Module* ReleaseModule() {
-      return M.take();
+    const Decl *GetDeclForMangledName(StringRef MangledName) override {
+      GlobalDecl Result;
+      if (!Builder->lookupRepresentativeDecl(MangledName, Result))
+        return nullptr;
+      const Decl *D = Result.getCanonicalDecl().getDecl();
+      if (auto FD = dyn_cast<FunctionDecl>(D)) {
+        if (FD->hasBody(FD))
+          return FD;
+      } else if (auto TD = dyn_cast<TagDecl>(D)) {
+        if (auto Def = TD->getDefinition())
+          return Def;
+      }
+      return D;
     }
 
-    virtual void Initialize(ASTContext &Context) {
+    llvm::Module *ReleaseModule() override { return M.release(); }
+
+    void Initialize(ASTContext &Context) override {
       Ctx = &Context;
 
       M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
@@ -65,48 +79,71 @@ namespace {
         HandleDependentLibrary(CodeGenOpts.DependentLibraries[i]);
     }
 
-    virtual void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+    void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
       if (Diags.hasErrorOccurred())
         return;
 
       Builder->HandleCXXStaticMemberVarInstantiation(VD);
     }
 
-    virtual bool HandleTopLevelDecl(DeclGroupRef DG) {
+    bool HandleTopLevelDecl(DeclGroupRef DG) override {
       if (Diags.hasErrorOccurred())
         return true;
 
       // Make sure to emit all elements of a Decl.
       for (DeclGroupRef::iterator I = DG.begin(), E = DG.end(); I != E; ++I)
         Builder->EmitTopLevelDecl(*I);
+
+      // Emit any deferred inline method definitions.
+      for (CXXMethodDecl *MD : DeferredInlineMethodDefinitions)
+        Builder->EmitTopLevelDecl(MD);
+      DeferredInlineMethodDefinitions.clear();
+
       return true;
     }
 
+    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+      if (Diags.hasErrorOccurred())
+        return;
+
+      assert(D->doesThisDeclarationHaveABody());
+
+      // We may want to emit this definition. However, that decision might be
+      // based on computing the linkage, and we have to defer that in case we
+      // are inside of something that will change the method's final linkage,
+      // e.g.
+      //   typedef struct {
+      //     void bar();
+      //     void foo() { bar(); }
+      //   } A;
+      DeferredInlineMethodDefinitions.push_back(D);
+    }
+
     /// HandleTagDeclDefinition - This callback is invoked each time a TagDecl
     /// to (e.g. struct, union, enum, class) is completed. This allows the
     /// client hack on the type, which can occur at any point in the file
     /// (because these can be defined in declspecs).
-    virtual void HandleTagDeclDefinition(TagDecl *D) {
+    void HandleTagDeclDefinition(TagDecl *D) override {
       if (Diags.hasErrorOccurred())
         return;
 
       Builder->UpdateCompletedType(D);
-      
-      // In C++, we may have member functions that need to be emitted at this 
-      // point.
-      if (Ctx->getLangOpts().CPlusPlus && !D->isDependentContext()) {
-        for (DeclContext::decl_iterator M = D->decls_begin(), 
-                                     MEnd = D->decls_end();
-             M != MEnd; ++M)
-          if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(*M))
-            if (Method->doesThisDeclarationHaveABody() &&
-                (Method->hasAttr<UsedAttr>() || 
-                 Method->hasAttr<ConstructorAttr>()))
-              Builder->EmitTopLevelDecl(Method);
+
+      // For MSVC compatibility, treat declarations of static data members with
+      // inline initializers as definitions.
+      if (Ctx->getLangOpts().MSVCCompat) {
+        for (Decl *Member : D->decls()) {
+          if (VarDecl *VD = dyn_cast<VarDecl>(Member)) {
+            if (Ctx->isMSStaticDataMemberInlineDefinition(VD) &&
+                Ctx->DeclMustBeEmitted(VD)) {
+              Builder->EmitGlobal(VD);
+            }
+          }
+        }
       }
     }
 
-    virtual void HandleTagDeclRequiredDefinition(const TagDecl *D) LLVM_OVERRIDE {
+    void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
       if (Diags.hasErrorOccurred())
         return;
 
@@ -115,8 +152,10 @@ namespace {
           DI->completeRequiredType(RD);
     }
 
-    virtual void HandleTranslationUnit(ASTContext &Ctx) {
+    void HandleTranslationUnit(ASTContext &Ctx) override {
       if (Diags.hasErrorOccurred()) {
+        if (Builder)
+          Builder->clear();
         M.reset();
         return;
       }
@@ -125,32 +164,35 @@ namespace {
         Builder->Release();
     }
 
-    virtual void CompleteTentativeDefinition(VarDecl *D) {
+    void CompleteTentativeDefinition(VarDecl *D) override {
       if (Diags.hasErrorOccurred())
         return;
 
       Builder->EmitTentativeDefinition(D);
     }
 
-    virtual void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) {
+    void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) override {
       if (Diags.hasErrorOccurred())
         return;
 
       Builder->EmitVTable(RD, DefinitionRequired);
     }
 
-    virtual void HandleLinkerOptionPragma(llvm::StringRef Opts) {
+    void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
       Builder->AppendLinkerOptions(Opts);
     }
 
-    virtual void HandleDetectMismatch(llvm::StringRef Name,
-                                      llvm::StringRef Value) {
+    void HandleDetectMismatch(llvm::StringRef Name,
+                              llvm::StringRef Value) override {
       Builder->AddDetectMismatch(Name, Value);
     }
 
-    virtual void HandleDependentLibrary(llvm::StringRef Lib) {
+    void HandleDependentLibrary(llvm::StringRef Lib) override {
       Builder->AddDependentLib(Lib);
     }
+
+  private:
+    std::vector<CXXMethodDecl *> DeferredInlineMethodDefinitions;
   };
 }
 
diff --git a/lib/CodeGen/SanitizerBlacklist.cpp b/lib/CodeGen/SanitizerBlacklist.cpp
new file mode 100644
index 000000000000..9f1ddc8e7d7b
--- /dev/null
+++ b/lib/CodeGen/SanitizerBlacklist.cpp
@@ -0,0 +1,52 @@
+//===--- SanitizerBlacklist.cpp - Blacklist for sanitizers ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// User-provided blacklist used to disable/alter instrumentation done in
+// sanitizers.
+//
+//===----------------------------------------------------------------------===//
+#include "SanitizerBlacklist.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+static StringRef GetGlobalTypeString(const llvm::GlobalValue &G) {
+  // Types of GlobalVariables are always pointer types.
+  llvm::Type *GType = G.getType()->getElementType();
+  // For now we support blacklisting struct types only.
+  if (llvm::StructType *SGType = dyn_cast<llvm::StructType>(GType)) {
+    if (!SGType->isLiteral())
+      return SGType->getName();
+  }
+  return "<unknown type>";
+}
+
+bool SanitizerBlacklist::isIn(const llvm::Module &M,
+                              const StringRef Category) const {
+  return SCL->inSection("src", M.getModuleIdentifier(), Category);
+}
+
+bool SanitizerBlacklist::isIn(const llvm::Function &F) const {
+  return isIn(*F.getParent()) ||
+         SCL->inSection("fun", F.getName(), "");
+}
+
+bool SanitizerBlacklist::isIn(const llvm::GlobalVariable &G,
+                              const StringRef Category) const {
+  return isIn(*G.getParent(), Category) ||
+         SCL->inSection("global", G.getName(), Category) ||
+         SCL->inSection("type", GetGlobalTypeString(G), Category);
+}
+
+bool SanitizerBlacklist::isBlacklistedType(StringRef MangledTypeName) const {
+  return SCL->inSection("type", MangledTypeName);
+}
diff --git a/lib/CodeGen/SanitizerBlacklist.h b/lib/CodeGen/SanitizerBlacklist.h
new file mode 100644
index 000000000000..659441dfe346
--- /dev/null
+++ b/lib/CodeGen/SanitizerBlacklist.h
@@ -0,0 +1,46 @@
+//===--- SanitizerBlacklist.h - Blacklist for sanitizers --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// User-provided blacklist used to disable/alter instrumentation done in
+// sanitizers.
+//
+//===----------------------------------------------------------------------===//
+#ifndef CLANG_CODEGEN_SANITIZERBLACKLIST_H
+#define CLANG_CODEGEN_SANITIZERBLACKLIST_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include <memory>
+
+namespace llvm {
+class GlobalVariable;
+class Function;
+class Module;
+}
+
+namespace clang {
+namespace CodeGen {
+
+class SanitizerBlacklist {
+  std::unique_ptr<llvm::SpecialCaseList> SCL;
+
+public:
+  SanitizerBlacklist(llvm::SpecialCaseList *SCL) : SCL(SCL) {}
+  bool isIn(const llvm::Module &M,
+            const StringRef Category = StringRef()) const;
+  bool isIn(const llvm::Function &F) const;
+  bool isIn(const llvm::GlobalVariable &G,
+            const StringRef Category = StringRef()) const;
+  bool isBlacklistedType(StringRef MangledTypeName) const;
+};
+}  // end namespace CodeGen
+}  // end namespace clang
+
+#endif
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 76acf871da26..5da22c3e6cbb 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -23,6 +23,9 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>    // std::sort
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -45,22 +48,6 @@ static bool isAggregateTypeForABI(QualType T) {
 
 ABIInfo::~ABIInfo() {}
 
-static bool isRecordReturnIndirect(const RecordType *RT,
-                                   CGCXXABI &CXXABI) {
-  const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
-  if (!RD)
-    return false;
-  return CXXABI.isReturnTypeIndirect(RD);
-}
-
-
-static bool isRecordReturnIndirect(QualType T, CGCXXABI &CXXABI) {
-  const RecordType *RT = T->getAs<RecordType>();
-  if (!RT)
-    return false;
-  return isRecordReturnIndirect(RT, CXXABI);
-}
-
 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
                                               CGCXXABI &CXXABI) {
   const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
@@ -114,6 +101,9 @@ void ABIArgInfo::dump() const {
   case Ignore:
     OS << "Ignore";
     break;
+  case InAlloca:
+    OS << "InAlloca Offset=" << getInAllocaFieldIndex();
+    break;
   case Indirect:
     OS << "Indirect Align=" << getIndirectAlign()
        << " ByVal=" << getIndirectByVal()
@@ -206,14 +196,12 @@ static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
 
   // If this is a C++ record, check the bases first.
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
-    for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
-           e = CXXRD->bases_end(); i != e; ++i)
-      if (!isEmptyRecord(Context, i->getType(), true))
+    for (const auto &I : CXXRD->bases())
+      if (!isEmptyRecord(Context, I.getType(), true))
         return false;
 
-  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-         i != e; ++i)
-    if (!isEmptyField(Context, *i, AllowArrays))
+  for (const auto *I : RD->fields())
+    if (!isEmptyField(Context, I, AllowArrays))
       return false;
   return true;
 }
@@ -229,38 +217,35 @@ static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
 static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
   const RecordType *RT = T->getAsStructureType();
   if (!RT)
-    return 0;
+    return nullptr;
 
   const RecordDecl *RD = RT->getDecl();
   if (RD->hasFlexibleArrayMember())
-    return 0;
+    return nullptr;
 
-  const Type *Found = 0;
+  const Type *Found = nullptr;
 
   // If this is a C++ record, check the bases first.
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
-    for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
-           e = CXXRD->bases_end(); i != e; ++i) {
+    for (const auto &I : CXXRD->bases()) {
       // Ignore empty records.
-      if (isEmptyRecord(Context, i->getType(), true))
+      if (isEmptyRecord(Context, I.getType(), true))
         continue;
 
       // If we already found an element then this isn't a single-element struct.
       if (Found)
-        return 0;
+        return nullptr;
 
       // If this is non-empty and not a single element struct, the composite
       // cannot be a single element struct.
-      Found = isSingleElementStruct(i->getType(), Context);
+      Found = isSingleElementStruct(I.getType(), Context);
       if (!Found)
-        return 0;
+        return nullptr;
     }
   }
 
   // Check for single element.
-  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-         i != e; ++i) {
-    const FieldDecl *FD = *i;
+  for (const auto *FD : RD->fields()) {
     QualType FT = FD->getType();
 
     // Ignore empty fields.
@@ -270,7 +255,7 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
     // If we already found an element then this isn't a single-element
     // struct.
     if (Found)
-      return 0;
+      return nullptr;
 
     // Treat single element arrays as the element.
     while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
@@ -284,14 +269,14 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
     } else {
       Found = isSingleElementStruct(FT, Context);
       if (!Found)
-        return 0;
+        return nullptr;
     }
   }
 
   // We don't consider a struct a single-element struct if it has
   // padding beyond the element type.
   if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T))
-    return 0;
+    return nullptr;
 
   return Found;
 }
@@ -336,10 +321,7 @@ static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) {
 
   uint64_t Size = 0;
 
-  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-         i != e; ++i) {
-    const FieldDecl *FD = *i;
-
+  for (const auto *FD : RD->fields()) {
     if (!is32Or64BitBasicType(FD->getType(), Context))
       return false;
 
@@ -371,15 +353,15 @@ public:
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
-  virtual void computeInfo(CGFunctionInfo &FI) const {
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it)
-      it->info = classifyArgumentType(it->type);
+  void computeInfo(CGFunctionInfo &FI) const override {
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (auto &I : FI.arguments())
+      I.info = classifyArgumentType(I.type);
   }
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -390,18 +372,12 @@ public:
 
 llvm::Value *DefaultABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                        CodeGenFunction &CGF) const {
-  return 0;
+  return nullptr;
 }
 
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
-  if (isAggregateTypeForABI(Ty)) {
-    // Records with non trivial destructors/constructors should not be passed
-    // by value.
-    if (isRecordReturnIndirect(Ty, getCXXABI()))
-      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
+  if (isAggregateTypeForABI(Ty))
     return ABIArgInfo::getIndirect(0);
-  }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -440,9 +416,9 @@ class PNaClABIInfo : public ABIInfo {
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -452,16 +428,16 @@ class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
 };
 
 void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it)
-      it->info = classifyArgumentType(it->type);
-  }
+  for (auto &I : FI.arguments())
+    I.info = classifyArgumentType(I.type);
+}
 
 llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                        CodeGenFunction &CGF) const {
-  return 0;
+  return nullptr;
 }
 
 /// \brief Classify argument of given type \p Ty.
@@ -512,7 +488,7 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
   if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) {
     if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) {
       // Invalid MMX constraint
-      return 0;
+      return nullptr;
     }
 
     return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
@@ -526,6 +502,16 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
 // X86-32 ABI Implementation
 //===----------------------------------------------------------------------===//
 
+/// \brief Similar to llvm::CCState, but for Clang.
+struct CCState {
+  CCState(unsigned CC) : CC(CC), FreeRegs(0) {}
+
+  unsigned CC;
+  unsigned FreeRegs;
+  unsigned StackOffset;
+  bool UseInAlloca;
+};
+
 /// X86_32ABIInfo - The X86-32 ABI information.
 class X86_32ABIInfo : public ABIInfo {
   enum Class {
@@ -544,30 +530,35 @@ class X86_32ABIInfo : public ABIInfo {
     return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
   }
 
-  static bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context, 
-                                          unsigned callingConvention);
+  bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
 
   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
   /// such that the argument will be passed in memory.
-  ABIArgInfo getIndirectResult(QualType Ty, bool ByVal,
-                               unsigned &FreeRegs) const;
+  ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
+
+  ABIArgInfo getIndirectReturnResult(CCState &State) const;
 
   /// \brief Return the alignment to use for the given type on the stack.
   unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
 
   Class classify(QualType Ty) const;
-  ABIArgInfo classifyReturnType(QualType RetTy,
-                                unsigned callingConvention) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &FreeRegs,
-                                  bool IsFastCall) const;
-  bool shouldUseInReg(QualType Ty, unsigned &FreeRegs,
-                      bool IsFastCall, bool &NeedsPadding) const;
+  ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
+  bool shouldUseInReg(QualType Ty, CCState &State, bool &NeedsPadding) const;
+
+  /// \brief Rewrite the function info so that all memory arguments use
+  /// inalloca.
+  void rewriteWithInAlloca(CGFunctionInfo &FI) const;
+
+  void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
+                           unsigned &StackOffset, ABIArgInfo &Info,
+                           QualType Type) const;
 
 public:
 
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 
   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w,
                 unsigned r)
@@ -585,24 +576,25 @@ public:
       const llvm::Triple &Triple, const CodeGenOptions &Opts);
 
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM) const;
+                           CodeGen::CodeGenModule &CGM) const override;
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     // Darwin uses different dwarf register numbers for EH.
     if (CGM.getTarget().getTriple().isOSDarwin()) return 5;
     return 4;
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const;
+                               llvm::Value *Address) const override;
 
   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
                                   StringRef Constraint,
-                                  llvm::Type* Ty) const {
+                                  llvm::Type* Ty) const override {
     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
   }
 
-  llvm::Constant *getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const {
+  llvm::Constant *
+  getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
     unsigned Sig = (0xeb << 0) |  // jmp rel8
                    (0x06 << 8) |  //           .+0x08
                    ('F' << 16) |
@@ -617,8 +609,7 @@ public:
 /// shouldReturnTypeInRegister - Determine if the given type should be
 /// passed in a register (for the Darwin ABI).
 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
-                                               ASTContext &Context,
-                                               unsigned callingConvention) {
+                                               ASTContext &Context) const {
   uint64_t Size = Context.getTypeSize(Ty);
 
   // Type must be register sized.
@@ -643,8 +634,7 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
 
   // Arrays are treated like records.
   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
-    return shouldReturnTypeInRegister(AT->getElementType(), Context,
-                                      callingConvention);
+    return shouldReturnTypeInRegister(AT->getElementType(), Context);
 
   // Otherwise, it must be a record type.
   const RecordType *RT = Ty->getAs<RecordType>();
@@ -652,33 +642,31 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
 
   // FIXME: Traverse bases here too.
 
-  // For thiscall conventions, structures will never be returned in
-  // a register.  This is for compatibility with the MSVC ABI
-  if (callingConvention == llvm::CallingConv::X86_ThisCall && 
-      RT->isStructureType()) {
-    return false;
-  }
-
   // Structure types are passed in register if all fields would be
   // passed in a register.
-  for (RecordDecl::field_iterator i = RT->getDecl()->field_begin(),
-         e = RT->getDecl()->field_end(); i != e; ++i) {
-    const FieldDecl *FD = *i;
-
+  for (const auto *FD : RT->getDecl()->fields()) {
     // Empty fields are ignored.
     if (isEmptyField(Context, FD, true))
       continue;
 
     // Check fields recursively.
-    if (!shouldReturnTypeInRegister(FD->getType(), Context, 
-                                    callingConvention))
+    if (!shouldReturnTypeInRegister(FD->getType(), Context))
       return false;
   }
   return true;
 }
 
-ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, 
-                                            unsigned callingConvention) const {
+ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(CCState &State) const {
+  // If the return value is indirect, then the hidden argument is consuming one
+  // integer register.
+  if (State.FreeRegs) {
+    --State.FreeRegs;
+    return ABIArgInfo::getIndirectInReg(/*Align=*/0, /*ByVal=*/false);
+  }
+  return ABIArgInfo::getIndirect(/*Align=*/0, /*ByVal=*/false);
+}
+
+ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
@@ -701,7 +689,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
         return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                             Size));
 
-      return ABIArgInfo::getIndirect(0);
+      return getIndirectReturnResult(State);
     }
 
     return ABIArgInfo::getDirect();
@@ -709,22 +697,18 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
 
   if (isAggregateTypeForABI(RetTy)) {
     if (const RecordType *RT = RetTy->getAs<RecordType>()) {
-      if (isRecordReturnIndirect(RT, getCXXABI()))
-        return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
       // Structures with flexible arrays are always indirect.
       if (RT->getDecl()->hasFlexibleArrayMember())
-        return ABIArgInfo::getIndirect(0);
+        return getIndirectReturnResult(State);
     }
 
     // If specified, structs and unions are always indirect.
     if (!IsSmallStructInRegABI && !RetTy->isAnyComplexType())
-      return ABIArgInfo::getIndirect(0);
+      return getIndirectReturnResult(State);
 
     // Small structures which are register sized are generally returned
     // in a register.
-    if (X86_32ABIInfo::shouldReturnTypeInRegister(RetTy, getContext(), 
-                                                  callingConvention)) {
+    if (shouldReturnTypeInRegister(RetTy, getContext())) {
       uint64_t Size = getContext().getTypeSize(RetTy);
 
       // As a special-case, if the struct is a "single-element" struct, and
@@ -742,7 +726,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
     }
 
-    return ABIArgInfo::getIndirect(0);
+    return getIndirectReturnResult(State);
   }
 
   // Treat an enum type as its underlying type.
@@ -765,13 +749,11 @@ static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) {
 
   // If this is a C++ record, check the bases first.
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
-    for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
-           e = CXXRD->bases_end(); i != e; ++i)
-      if (!isRecordWithSSEVectorType(Context, i->getType()))
+    for (const auto &I : CXXRD->bases())
+      if (!isRecordWithSSEVectorType(Context, I.getType()))
         return false;
 
-  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-       i != e; ++i) {
+  for (const auto *i : RD->fields()) {
     QualType FT = i->getType();
 
     if (isSSEVectorType(Context, FT))
@@ -806,10 +788,10 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
 }
 
 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
-                                            unsigned &FreeRegs) const {
+                                            CCState &State) const {
   if (!ByVal) {
-    if (FreeRegs) {
-      --FreeRegs; // Non byval indirects just use one pointer.
+    if (State.FreeRegs) {
+      --State.FreeRegs; // Non-byval indirects just use one pointer.
       return ABIArgInfo::getIndirectInReg(0, false);
     }
     return ABIArgInfo::getIndirect(0, false);
@@ -819,15 +801,12 @@ ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
   unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
   if (StackAlign == 0)
-    return ABIArgInfo::getIndirect(4);
+    return ABIArgInfo::getIndirect(4, /*ByVal=*/true);
 
   // If the stack alignment is less than the type alignment, realign the
   // argument.
-  if (StackAlign < TypeAlign)
-    return ABIArgInfo::getIndirect(StackAlign, /*ByVal=*/true,
-                                   /*Realign=*/true);
-
-  return ABIArgInfo::getIndirect(StackAlign);
+  bool Realign = TypeAlign > StackAlign;
+  return ABIArgInfo::getIndirect(StackAlign, /*ByVal=*/true, Realign);
 }
 
 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
@@ -843,8 +822,8 @@ X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
   return Integer;
 }
 
-bool X86_32ABIInfo::shouldUseInReg(QualType Ty, unsigned &FreeRegs,
-                                   bool IsFastCall, bool &NeedsPadding) const {
+bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
+                                   bool &NeedsPadding) const {
   NeedsPadding = false;
   Class C = classify(Ty);
   if (C == Float)
@@ -856,14 +835,14 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, unsigned &FreeRegs,
   if (SizeInRegs == 0)
     return false;
 
-  if (SizeInRegs > FreeRegs) {
-    FreeRegs = 0;
+  if (SizeInRegs > State.FreeRegs) {
+    State.FreeRegs = 0;
     return false;
   }
 
-  FreeRegs -= SizeInRegs;
+  State.FreeRegs -= SizeInRegs;
 
-  if (IsFastCall) {
+  if (State.CC == llvm::CallingConv::X86_FastCall) {
     if (Size > 32)
       return false;
 
@@ -876,7 +855,7 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, unsigned &FreeRegs,
     if (Ty->isReferenceType())
       return true;
 
-    if (FreeRegs)
+    if (State.FreeRegs)
       NeedsPadding = true;
 
     return false;
@@ -886,20 +865,26 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, unsigned &FreeRegs,
 }
 
 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
-                                               unsigned &FreeRegs,
-                                               bool IsFastCall) const {
+                                               CCState &State) const {
   // FIXME: Set alignment on indirect arguments.
   if (isAggregateTypeForABI(Ty)) {
     if (const RecordType *RT = Ty->getAs<RecordType>()) {
-      if (IsWin32StructABI)
-        return getIndirectResult(Ty, true, FreeRegs);
+      // Check with the C++ ABI first.
+      CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+      if (RAA == CGCXXABI::RAA_Indirect) {
+        return getIndirectResult(Ty, false, State);
+      } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
+        // The field index doesn't matter, we'll fix it up later.
+        return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
+      }
 
-      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
-        return getIndirectResult(Ty, RAA == CGCXXABI::RAA_DirectInMemory, FreeRegs);
+      // Structs are always byval on win32, regardless of what they contain.
+      if (IsWin32StructABI)
+        return getIndirectResult(Ty, true, State);
 
       // Structures with flexible arrays are always indirect.
       if (RT->getDecl()->hasFlexibleArrayMember())
-        return getIndirectResult(Ty, true, FreeRegs);
+        return getIndirectResult(Ty, true, State);
     }
 
     // Ignore empty structs/unions.
@@ -909,13 +894,13 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
     llvm::LLVMContext &LLVMContext = getVMContext();
     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
     bool NeedsPadding;
-    if (shouldUseInReg(Ty, FreeRegs, IsFastCall, NeedsPadding)) {
+    if (shouldUseInReg(Ty, State, NeedsPadding)) {
       unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
       return ABIArgInfo::getDirectInReg(Result);
     }
-    llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : 0;
+    llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
 
     // Expand small (<= 128-bit) record types when we know that the stack layout
     // of those arguments will match the struct. This is important because the
@@ -923,9 +908,10 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
     // optimizations.
     if (getContext().getTypeSize(Ty) <= 4*32 &&
         canExpandIndirectArgument(Ty, getContext()))
-      return ABIArgInfo::getExpandWithPadding(IsFastCall, PaddingType);
+      return ABIArgInfo::getExpandWithPadding(
+          State.CC == llvm::CallingConv::X86_FastCall, PaddingType);
 
-    return getIndirectResult(Ty, true, FreeRegs);
+    return getIndirectResult(Ty, true, State);
   }
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
@@ -950,7 +936,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
     Ty = EnumTy->getDecl()->getIntegerType();
 
   bool NeedsPadding;
-  bool InReg = shouldUseInReg(Ty, FreeRegs, IsFastCall, NeedsPadding);
+  bool InReg = shouldUseInReg(Ty, State, NeedsPadding);
 
   if (Ty->isPromotableIntegerType()) {
     if (InReg)
@@ -963,32 +949,105 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
 }
 
 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType(),
-                                          FI.getCallingConvention());
-
-  unsigned CC = FI.getCallingConvention();
-  bool IsFastCall = CC == llvm::CallingConv::X86_FastCall;
-  unsigned FreeRegs;
-  if (IsFastCall)
-    FreeRegs = 2;
+  CCState State(FI.getCallingConvention());
+  if (State.CC == llvm::CallingConv::X86_FastCall)
+    State.FreeRegs = 2;
   else if (FI.getHasRegParm())
-    FreeRegs = FI.getRegParm();
+    State.FreeRegs = FI.getRegParm();
   else
-    FreeRegs = DefaultNumRegisterParameters;
+    State.FreeRegs = DefaultNumRegisterParameters;
+
+  if (!getCXXABI().classifyReturnType(FI)) {
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
+  } else if (FI.getReturnInfo().isIndirect()) {
+    // The C++ ABI is not aware of register usage, so we have to check if the
+    // return value was sret and put it in a register ourselves if appropriate.
+    if (State.FreeRegs) {
+      --State.FreeRegs;  // The sret parameter consumes a register.
+      FI.getReturnInfo().setInReg(true);
+    }
+  }
 
-  // If the return value is indirect, then the hidden argument is consuming one
-  // integer register.
-  if (FI.getReturnInfo().isIndirect() && FreeRegs) {
-    --FreeRegs;
-    ABIArgInfo &Old = FI.getReturnInfo();
-    Old = ABIArgInfo::getIndirectInReg(Old.getIndirectAlign(),
-                                       Old.getIndirectByVal(),
-                                       Old.getIndirectRealign());
+  bool UsedInAlloca = false;
+  for (auto &I : FI.arguments()) {
+    I.info = classifyArgumentType(I.type, State);
+    UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
   }
 
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classifyArgumentType(it->type, FreeRegs, IsFastCall);
+  // If we needed to use inalloca for any argument, do a second pass and rewrite
+  // all the memory arguments to use inalloca.
+  if (UsedInAlloca)
+    rewriteWithInAlloca(FI);
+}
+
+void
+X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
+                                   unsigned &StackOffset,
+                                   ABIArgInfo &Info, QualType Type) const {
+  assert(StackOffset % 4U == 0 && "unaligned inalloca struct");
+  Info = ABIArgInfo::getInAlloca(FrameFields.size());
+  FrameFields.push_back(CGT.ConvertTypeForMem(Type));
+  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
+
+  // Insert padding bytes to respect alignment.  For x86_32, each argument is 4
+  // byte aligned.
+  if (StackOffset % 4U) {
+    unsigned OldOffset = StackOffset;
+    StackOffset = llvm::RoundUpToAlignment(StackOffset, 4U);
+    unsigned NumBytes = StackOffset - OldOffset;
+    assert(NumBytes);
+    llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
+    Ty = llvm::ArrayType::get(Ty, NumBytes);
+    FrameFields.push_back(Ty);
+  }
+}
+
+void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
+  assert(IsWin32StructABI && "inalloca only supported on win32");
+
+  // Build a packed struct type for all of the arguments in memory.
+  SmallVector<llvm::Type *, 6> FrameFields;
+
+  unsigned StackOffset = 0;
+
+  // Put the sret parameter into the inalloca struct if it's in memory.
+  ABIArgInfo &Ret = FI.getReturnInfo();
+  if (Ret.isIndirect() && !Ret.getInReg()) {
+    CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
+    addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
+    // On Windows, the hidden sret parameter is always returned in eax.
+    Ret.setInAllocaSRet(IsWin32StructABI);
+  }
+
+  // Skip the 'this' parameter in ecx.
+  CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
+  if (FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall)
+    ++I;
+
+  // Put arguments passed in memory into the struct.
+  for (; I != E; ++I) {
+
+    // Leave ignored and inreg arguments alone.
+    switch (I->info.getKind()) {
+    case ABIArgInfo::Indirect:
+      assert(I->info.getIndirectByVal());
+      break;
+    case ABIArgInfo::Ignore:
+      continue;
+    case ABIArgInfo::Direct:
+    case ABIArgInfo::Extend:
+      if (I->info.getInReg())
+        continue;
+      break;
+    default:
+      break;
+    }
+
+    addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
+  }
+
+  FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
+                                        /*isPacked=*/true));
 }
 
 llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -1031,6 +1090,44 @@ llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   return AddrTyped;
 }
 
+bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
+    const llvm::Triple &Triple, const CodeGenOptions &Opts) {
+  assert(Triple.getArch() == llvm::Triple::x86);
+
+  switch (Opts.getStructReturnConvention()) {
+  case CodeGenOptions::SRCK_Default:
+    break;
+  case CodeGenOptions::SRCK_OnStack:  // -fpcc-struct-return
+    return false;
+  case CodeGenOptions::SRCK_InRegs:  // -freg-struct-return
+    return true;
+  }
+
+  if (Triple.isOSDarwin())
+    return true;
+
+  switch (Triple.getOS()) {
+  case llvm::Triple::AuroraUX:
+  case llvm::Triple::DragonFly:
+  case llvm::Triple::FreeBSD:
+  case llvm::Triple::OpenBSD:
+  case llvm::Triple::Bitrig:
+    return true;
+  case llvm::Triple::Win32:
+    switch (Triple.getEnvironment()) {
+    case llvm::Triple::UnknownEnvironment:
+    case llvm::Triple::Cygnus:
+    case llvm::Triple::GNU:
+    case llvm::Triple::MSVC:
+      return true;
+    default:
+      return false;
+    }
+  default:
+    return false;
+  }
+}
+
 void X86_32TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
                                                   llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
@@ -1219,10 +1316,10 @@ public:
     return false;
   }
 
-  virtual void computeInfo(CGFunctionInfo &FI) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -1233,10 +1330,10 @@ class WinX86_64ABIInfo : public ABIInfo {
 public:
   WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
-  virtual void computeInfo(CGFunctionInfo &FI) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -1248,12 +1345,12 @@ public:
     return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
   }
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 7;
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const {
+                               llvm::Value *Address) const override {
     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
 
     // 0-15 are the 16 integer registers.
@@ -1264,12 +1361,12 @@ public:
 
   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
                                   StringRef Constraint,
-                                  llvm::Type* Ty) const {
+                                  llvm::Type* Ty) const override {
     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
   }
 
   bool isNoProtoCallVariadic(const CallArgList &args,
-                             const FunctionNoProtoType *fnType) const {
+                             const FunctionNoProtoType *fnType) const override {
     // The default CC on x86-64 sets %al to the number of SSA
     // registers used, and GCC sets this when calling an unprototyped
     // function, so we override the default behavior.  However, don't do
@@ -1293,7 +1390,8 @@ public:
     return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
   }
 
-  llvm::Constant *getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const {
+  llvm::Constant *
+  getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
     unsigned Sig = (0xeb << 0) |  // jmp rel8
                    (0x0a << 8) |  //           .+0x0c
                    ('F' << 16) |
@@ -1319,14 +1417,14 @@ public:
     : X86_32TargetCodeGenInfo(CGT, d, p, w, RegParms) {}
 
   void getDependentLibraryOption(llvm::StringRef Lib,
-                                 llvm::SmallString<24> &Opt) const {
+                                 llvm::SmallString<24> &Opt) const override {
     Opt = "/DEFAULTLIB:";
     Opt += qualifyWindowsLibrary(Lib);
   }
 
   void getDetectMismatchOption(llvm::StringRef Name,
                                llvm::StringRef Value,
-                               llvm::SmallString<32> &Opt) const {
+                               llvm::SmallString<32> &Opt) const override {
     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
   }
 };
@@ -1336,12 +1434,12 @@ public:
   WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
     : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 7;
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const {
+                               llvm::Value *Address) const override {
     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
 
     // 0-15 are the 16 integer registers.
@@ -1351,14 +1449,14 @@ public:
   }
 
   void getDependentLibraryOption(llvm::StringRef Lib,
-                                 llvm::SmallString<24> &Opt) const {
+                                 llvm::SmallString<24> &Opt) const override {
     Opt = "/DEFAULTLIB:";
     Opt += qualifyWindowsLibrary(Lib);
   }
 
   void getDetectMismatchOption(llvm::StringRef Name,
                                llvm::StringRef Value,
-                               llvm::SmallString<32> &Opt) const {
+                               llvm::SmallString<32> &Opt) const override {
     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
   }
 };
@@ -1642,12 +1740,11 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
 
     // If this is a C++ record, classify the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
-      for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
-             e = CXXRD->bases_end(); i != e; ++i) {
-        assert(!i->isVirtual() && !i->getType()->isDependentType() &&
+      for (const auto &I : CXXRD->bases()) {
+        assert(!I.isVirtual() && !I.getType()->isDependentType() &&
                "Unexpected base class!");
         const CXXRecordDecl *Base =
-          cast<CXXRecordDecl>(i->getType()->getAs<RecordType>()->getDecl());
+          cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
 
         // Classify this field.
         //
@@ -1657,7 +1754,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
         Class FieldLo, FieldHi;
         uint64_t Offset =
           OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
-        classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
+        classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg);
         Lo = merge(Lo, FieldLo);
         Hi = merge(Hi, FieldHi);
         if (Lo == Memory || Hi == Memory)
@@ -1887,19 +1984,18 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
 
     // If this is a C++ record, check the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
-      for (CXXRecordDecl::base_class_const_iterator i = CXXRD->bases_begin(),
-           e = CXXRD->bases_end(); i != e; ++i) {
-        assert(!i->isVirtual() && !i->getType()->isDependentType() &&
+      for (const auto &I : CXXRD->bases()) {
+        assert(!I.isVirtual() && !I.getType()->isDependentType() &&
                "Unexpected base class!");
         const CXXRecordDecl *Base =
-          cast<CXXRecordDecl>(i->getType()->getAs<RecordType>()->getDecl());
+          cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
 
         // If the base is after the span we care about, ignore it.
         unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
         if (BaseOffset >= EndBit) continue;
 
         unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0;
-        if (!BitsContainNoUserData(i->getType(), BaseStart,
+        if (!BitsContainNoUserData(I.getType(), BaseStart,
                                    EndBit-BaseOffset, Context))
           return false;
       }
@@ -1995,7 +2091,7 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
 /// the source type.  IROffset is an offset in bytes into the LLVM IR type that
 /// the 8-byte value references.  PrefType may be null.
 ///
-/// SourceTy is the source level type for the entire argument.  SourceOffset is
+/// SourceTy is the source-level type for the entire argument.  SourceOffset is
 /// an offset into this that we're processing (which is always either 0 or 8).
 ///
 llvm::Type *X86_64ABIInfo::
@@ -2114,7 +2210,7 @@ classifyReturnType(QualType RetTy) const {
   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
 
-  llvm::Type *ResType = 0;
+  llvm::Type *ResType = nullptr;
   switch (Lo) {
   case NoClass:
     if (Hi == NoClass)
@@ -2175,7 +2271,7 @@ classifyReturnType(QualType RetTy) const {
     break;
   }
 
-  llvm::Type *HighPart = 0;
+  llvm::Type *HighPart = nullptr;
   switch (Hi) {
     // Memory was handled previously and X87 should
     // never occur as a hi class.
@@ -2247,7 +2343,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
 
   neededInt = 0;
   neededSSE = 0;
-  llvm::Type *ResType = 0;
+  llvm::Type *ResType = nullptr;
   switch (Lo) {
   case NoClass:
     if (Hi == NoClass)
@@ -2308,7 +2404,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
   }
   }
 
-  llvm::Type *HighPart = 0;
+  llvm::Type *HighPart = nullptr;
   switch (Hi) {
     // Memory was handled previously, ComplexX87 and X87 should
     // never occur as hi classes, and X87Up must be preceded by X87,
@@ -2361,7 +2457,8 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(
 
 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
   // Keep track of the number of assigned registers.
   unsigned freeIntRegs = 6, freeSSERegs = 8;
@@ -2482,9 +2579,9 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of
   // register save space).
 
-  llvm::Value *InRegs = 0;
-  llvm::Value *gp_offset_p = 0, *gp_offset = 0;
-  llvm::Value *fp_offset_p = 0, *fp_offset = 0;
+  llvm::Value *InRegs = nullptr;
+  llvm::Value *gp_offset_p = nullptr, *gp_offset = nullptr;
+  llvm::Value *fp_offset_p = nullptr, *fp_offset = nullptr;
   if (neededInt) {
     gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p");
     gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
@@ -2539,8 +2636,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
     llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
     llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegAddr, gp_offset);
     llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegAddr, fp_offset);
-    llvm::Value *RegLoAddr = TyLo->isFloatingPointTy() ? FPAddr : GPAddr;
-    llvm::Value *RegHiAddr = TyLo->isFloatingPointTy() ? GPAddr : FPAddr;
+    llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
+    llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
     llvm::Value *V =
       CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegLoAddr, PTyLo));
     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
@@ -2630,11 +2727,9 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
 
   uint64_t Size = getContext().getTypeSize(Ty);
 
-  if (const RecordType *RT = Ty->getAs<RecordType>()) {
-    if (IsReturnType) {
-      if (isRecordReturnIndirect(RT, getCXXABI()))
-        return ABIArgInfo::getIndirect(0, false);
-    } else {
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (RT) {
+    if (!IsReturnType) {
       if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
         return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
     }
@@ -2643,18 +2738,27 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
       return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
     // FIXME: mingw-w64-gcc emits 128-bit struct as i128
-    if (Size == 128 && getTarget().getTriple().getOS() == llvm::Triple::MinGW32)
+    if (Size == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                           Size));
+  }
 
+  if (Ty->isMemberPointerType()) {
+    // If the member pointer is represented by an LLVM int or ptr, pass it
+    // directly.
+    llvm::Type *LLTy = CGT.ConvertType(Ty);
+    if (LLTy->isPointerTy() || LLTy->isIntegerTy())
+      return ABIArgInfo::getDirect();
+  }
+
+  if (RT || Ty->isMemberPointerType()) {
     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
     // not 1, 2, 4, or 8 bytes, must be passed by reference."
-    if (Size <= 64 &&
-        (Size & (Size - 1)) == 0)
-      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
-                                                          Size));
+    if (Size > 64 || !llvm::isPowerOf2_64(Size))
+      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    // Otherwise, coerce it to a small integer.
+    return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
   if (Ty->isPromotableIntegerType())
@@ -2664,13 +2768,11 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
 }
 
 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classify(FI.getReturnType(), true);
 
-  QualType RetTy = FI.getReturnType();
-  FI.getReturnInfo() = classify(RetTy, true);
-
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classify(it->type, false);
+  for (auto &I : FI.arguments())
+    I.info = classify(I.type, false);
 }
 
 llvm::Value *WinX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -2701,9 +2803,9 @@ class NaClX86_64ABIInfo : public ABIInfo {
  public:
   NaClX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
       : ABIInfo(CGT), PInfo(CGT), NInfo(CGT, HasAVX) {}
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
  private:
   PNaClABIInfo PInfo;  // Used for generating calls with pnaclcall callingconv.
   X86_64ABIInfo NInfo; // Used for everything else.
@@ -2739,13 +2841,13 @@ class PPC32TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
 public:
   PPC32TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {}
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
     return 1; // r1 is the dedicated stack pointer
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const;
+                               llvm::Value *Address) const override;
 };
 
 }
@@ -2796,11 +2898,24 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 namespace {
 /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
 class PPC64_SVR4_ABIInfo : public DefaultABIInfo {
+public:
+  enum ABIKind {
+    ELFv1 = 0,
+    ELFv2
+  };
+
+private:
+  static const unsigned GPRBits = 64;
+  ABIKind Kind;
 
 public:
-  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
+    : DefaultABIInfo(CGT), Kind(Kind) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
+  bool isAlignedParamType(QualType Ty) const;
+  bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                              uint64_t &Members) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -2811,56 +2926,57 @@ public:
   // floating-point value) to avoid pushing them to memory on function
   // entry.  This would require changing the logic in PPCISelLowering
   // when lowering the parameters in the caller and args in the callee.
-  virtual void computeInfo(CGFunctionInfo &FI) const {
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it) {
+  void computeInfo(CGFunctionInfo &FI) const override {
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (auto &I : FI.arguments()) {
       // We rely on the default argument classification for the most part.
       // One exception:  An aggregate containing a single floating-point
       // or vector item must be passed in a register if one is available.
-      const Type *T = isSingleElementStruct(it->type, getContext());
+      const Type *T = isSingleElementStruct(I.type, getContext());
       if (T) {
         const BuiltinType *BT = T->getAs<BuiltinType>();
-        if (T->isVectorType() || (BT && BT->isFloatingPoint())) {
+        if ((T->isVectorType() && getContext().getTypeSize(T) == 128) ||
+            (BT && BT->isFloatingPoint())) {
           QualType QT(T, 0);
-          it->info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
+          I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
           continue;
         }
       }
-      it->info = classifyArgumentType(it->type);
+      I.info = classifyArgumentType(I.type);
     }
   }
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, 
-                                 QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT)
-    : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT)) {}
+  PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
+                               PPC64_SVR4_ABIInfo::ABIKind Kind)
+    : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind)) {}
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
     return 1; // r1 is the dedicated stack pointer
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const;
+                               llvm::Value *Address) const override;
 };
 
 class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
 public:
   PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {}
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
     return 1; // r1 is the dedicated stack pointer
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const;
+                               llvm::Value *Address) const override;
 };
 
 }
@@ -2891,16 +3007,205 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
   return false;
 }
 
+/// isAlignedParamType - Determine whether a type requires 16-byte
+/// alignment in the parameter area.
+bool
+PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
+  // Complex types are passed just like their elements.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  // Only vector types of size 16 bytes need alignment (larger types are
+  // passed via reference, smaller types are not aligned).
+  if (Ty->isVectorType())
+    return getContext().getTypeSize(Ty) == 128;
+
+  // For single-element float/vector structs, we consider the whole type
+  // to have the same alignment requirements as its single element.
+  const Type *AlignAsType = nullptr;
+  const Type *EltType = isSingleElementStruct(Ty, getContext());
+  if (EltType) {
+    const BuiltinType *BT = EltType->getAs<BuiltinType>();
+    if ((EltType->isVectorType() &&
+         getContext().getTypeSize(EltType) == 128) ||
+        (BT && BT->isFloatingPoint()))
+      AlignAsType = EltType;
+  }
+
+  // Likewise for ELFv2 homogeneous aggregates.
+  const Type *Base = nullptr;
+  uint64_t Members = 0;
+  if (!AlignAsType && Kind == ELFv2 &&
+      isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members))
+    AlignAsType = Base;
+
+  // With special case aggregates, only vector base types need alignment.
+  if (AlignAsType)
+    return AlignAsType->isVectorType();
+
+  // Otherwise, we only need alignment for any aggregate type that
+  // has an alignment requirement of >= 16 bytes.
+  if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128)
+    return true;
+
+  return false;
+}
+
+/// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
+/// aggregate.  Base is set to the base element type, and Members is set
+/// to the number of base elements.
+bool
+PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                           uint64_t &Members) const {
+  if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
+    uint64_t NElements = AT->getSize().getZExtValue();
+    if (NElements == 0)
+      return false;
+    if (!isHomogeneousAggregate(AT->getElementType(), Base, Members))
+      return false;
+    Members *= NElements;
+  } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    if (RD->hasFlexibleArrayMember())
+      return false;
+
+    Members = 0;
+    for (const auto *FD : RD->fields()) {
+      // Ignore (non-zero arrays of) empty records.
+      QualType FT = FD->getType();
+      while (const ConstantArrayType *AT =
+             getContext().getAsConstantArrayType(FT)) {
+        if (AT->getSize().getZExtValue() == 0)
+          return false;
+        FT = AT->getElementType();
+      }
+      if (isEmptyRecord(getContext(), FT, true))
+        continue;
+
+      // For compatibility with GCC, ignore empty bitfields in C++ mode.
+      if (getContext().getLangOpts().CPlusPlus &&
+          FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
+        continue;
+
+      uint64_t FldMembers;
+      if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers))
+        return false;
+
+      Members = (RD->isUnion() ?
+                 std::max(Members, FldMembers) : Members + FldMembers);
+    }
+
+    if (!Base)
+      return false;
+
+    // Ensure there is no padding.
+    if (getContext().getTypeSize(Base) * Members !=
+        getContext().getTypeSize(Ty))
+      return false;
+  } else {
+    Members = 1;
+    if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
+      Members = 2;
+      Ty = CT->getElementType();
+    }
+
+    // Homogeneous aggregates for ELFv2 must have base types of float,
+    // double, long double, or 128-bit vectors.
+    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+      if (BT->getKind() != BuiltinType::Float &&
+          BT->getKind() != BuiltinType::Double &&
+          BT->getKind() != BuiltinType::LongDouble)
+        return false;
+    } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+      if (getContext().getTypeSize(VT) != 128)
+        return false;
+    } else {
+      return false;
+    }
+
+    // The base type must be the same for all members.  Types that
+    // agree in both total size and mode (float vs. vector) are
+    // treated as being equivalent here.
+    const Type *TyPtr = Ty.getTypePtr();
+    if (!Base)
+      Base = TyPtr;
+
+    if (Base->isVectorType() != TyPtr->isVectorType() ||
+        getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
+      return false;
+  }
+
+  // Vector types require one register, floating point types require one
+  // or two registers depending on their size.
+  uint32_t NumRegs = Base->isVectorType() ? 1 :
+                       (getContext().getTypeSize(Base) + 63) / 64;
+
+  // Homogeneous Aggregates may occupy at most 8 registers.
+  return (Members > 0 && Members * NumRegs <= 8);
+}
+
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
   if (Ty->isAnyComplexType())
     return ABIArgInfo::getDirect();
 
+  // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes)
+  // or via reference (larger than 16 bytes).
+  if (Ty->isVectorType()) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size > 128)
+      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    else if (Size < 128) {
+      llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+  }
+
   if (isAggregateTypeForABI(Ty)) {
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
       return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
 
-    return ABIArgInfo::getIndirect(0);
+    uint64_t ABIAlign = isAlignedParamType(Ty)? 16 : 8;
+    uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
+
+    // ELFv2 homogeneous aggregates are passed as array types.
+    const Type *Base = nullptr;
+    uint64_t Members = 0;
+    if (Kind == ELFv2 &&
+        isHomogeneousAggregate(Ty, Base, Members)) {
+      llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
+      llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
+    // If an aggregate may end up fully in registers, we do not
+    // use the ByVal method, but pass the aggregate as array.
+    // This is usually beneficial since we avoid forcing the
+    // back-end to store the argument to memory.
+    uint64_t Bits = getContext().getTypeSize(Ty);
+    if (Bits > 0 && Bits <= 8 * GPRBits) {
+      llvm::Type *CoerceTy;
+
+      // Types up to 8 bytes are passed as integer type (which will be
+      // properly aligned in the argument save area doubleword).
+      if (Bits <= GPRBits)
+        CoerceTy = llvm::IntegerType::get(getVMContext(),
+                                          llvm::RoundUpToAlignment(Bits, 8));
+      // Larger types are passed as arrays, with the base type selected
+      // according to the required alignment in the save area.
+      else {
+        uint64_t RegBits = ABIAlign * 8;
+        uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits;
+        llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
+        CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
+      }
+
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
+    // All other aggregates are passed ByVal.
+    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
+                                   /*Realign=*/TyAlign > ABIAlign);
   }
 
   return (isPromotableTypeForABI(Ty) ?
@@ -2915,8 +3220,48 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isAnyComplexType())
     return ABIArgInfo::getDirect();
 
-  if (isAggregateTypeForABI(RetTy))
+  // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes)
+  // or via reference (larger than 16 bytes).
+  if (RetTy->isVectorType()) {
+    uint64_t Size = getContext().getTypeSize(RetTy);
+    if (Size > 128)
+      return ABIArgInfo::getIndirect(0);
+    else if (Size < 128) {
+      llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+  }
+
+  if (isAggregateTypeForABI(RetTy)) {
+    // ELFv2 homogeneous aggregates are returned as array types.
+    const Type *Base = nullptr;
+    uint64_t Members = 0;
+    if (Kind == ELFv2 &&
+        isHomogeneousAggregate(RetTy, Base, Members)) {
+      llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
+      llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
+    // ELFv2 small aggregates are returned in up to two registers.
+    uint64_t Bits = getContext().getTypeSize(RetTy);
+    if (Kind == ELFv2 && Bits <= 2 * GPRBits) {
+      if (Bits == 0)
+        return ABIArgInfo::getIgnore();
+
+      llvm::Type *CoerceTy;
+      if (Bits > GPRBits) {
+        CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
+        CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, NULL);
+      } else
+        CoerceTy = llvm::IntegerType::get(getVMContext(),
+                                          llvm::RoundUpToAlignment(Bits, 8));
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
+    // All other aggregates are returned indirectly.
     return ABIArgInfo::getIndirect(0);
+  }
 
   return (isPromotableTypeForABI(RetTy) ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
@@ -2933,6 +3278,14 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
 
+  // Handle types that require 16-byte alignment in the parameter save area.
+  if (isAlignedParamType(Ty)) {
+    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
+    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(15));
+    AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt64(-16));
+    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
+  }
+
   // Update the va_list pointer.  The pointer should be bumped by the
   // size of the object.  We can trust getTypeSize() except for a complex
   // type whose base type is smaller than a doubleword.  For these, the
@@ -2963,8 +3316,12 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
   if (CplxBaseSize && CplxBaseSize < 8) {
     llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
     llvm::Value *ImagAddr = RealAddr;
-    RealAddr = Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
-    ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
+    if (CGF.CGM.getDataLayout().isBigEndian()) {
+      RealAddr = Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
+      ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
+    } else {
+      ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(8));
+    }
     llvm::Type *PBaseTy = llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy));
     RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy);
     ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
@@ -2982,7 +3339,7 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
   // If the argument is smaller than 8 bytes, it is right-adjusted in
   // its doubleword slot.  Adjust the pointer to pick it up from the
   // correct offset.
-  if (SizeInBytes < 8) {
+  if (SizeInBytes < 8 && CGF.CGM.getDataLayout().isBigEndian()) {
     llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
     AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(8 - SizeInBytes));
     Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
@@ -3049,6 +3406,598 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 }
 
 //===----------------------------------------------------------------------===//
+// AArch64 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AArch64ABIInfo : public ABIInfo {
+public:
+  enum ABIKind {
+    AAPCS = 0,
+    DarwinPCS
+  };
+
+private:
+  ABIKind Kind;
+
+public:
+  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
+
+private:
+  ABIKind getABIKind() const { return Kind; }
+  bool isDarwinPCS() const { return Kind == DarwinPCS; }
+
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
+                                  bool &IsHA, unsigned &AllocatedGPR,
+                                  bool &IsSmallAggr, bool IsNamedArg) const;
+  bool isIllegalVectorType(QualType Ty) const;
+
+  virtual void computeInfo(CGFunctionInfo &FI) const {
+    // To correctly handle Homogeneous Aggregate, we need to keep track of the
+    // number of SIMD and Floating-point registers allocated so far.
+    // If the argument is an HFA or an HVA and there are sufficient unallocated
+    // SIMD and Floating-point registers, then the argument is allocated to SIMD
+    // and Floating-point Registers (with one register per member of the HFA or
+    // HVA). Otherwise, the NSRN is set to 8.
+    unsigned AllocatedVFP = 0;
+
+    // To correctly handle small aggregates, we need to keep track of the number
+    // of GPRs allocated so far. If the small aggregate can't all fit into
+    // registers, it will be on stack. We don't allow the aggregate to be
+    // partially in registers.
+    unsigned AllocatedGPR = 0;
+
+    // Find the number of named arguments. Variadic arguments get special
+    // treatment with the Darwin ABI.
+    unsigned NumRequiredArgs = (FI.isVariadic() ?
+                                FI.getRequiredArgs().getNumRequiredArgs() :
+                                FI.arg_size());
+
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+         it != ie; ++it) {
+      unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR;
+      bool IsHA = false, IsSmallAggr = false;
+      const unsigned NumVFPs = 8;
+      const unsigned NumGPRs = 8;
+      bool IsNamedArg = ((it - FI.arg_begin()) <
+                         static_cast<signed>(NumRequiredArgs));
+      it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA,
+                                      AllocatedGPR, IsSmallAggr, IsNamedArg);
+
+      // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
+      // as sequences of floats since they'll get "holes" inserted as
+      // padding by the back end.
+      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() &&
+          getContext().getTypeAlign(it->type) < 64) {
+        uint32_t NumStackSlots = getContext().getTypeSize(it->type);
+        NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
+
+        llvm::Type *CoerceTy = llvm::ArrayType::get(
+            llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
+        it->info = ABIArgInfo::getDirect(CoerceTy);
+      }
+
+      // If we do not have enough VFP registers for the HA, any VFP registers
+      // that are unallocated are marked as unavailable. To achieve this, we add
+      // padding of (NumVFPs - PreAllocation) floats.
+      if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
+        llvm::Type *PaddingTy = llvm::ArrayType::get(
+            llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
+        it->info.setPaddingType(PaddingTy);
+      }
+
+      // If we do not have enough GPRs for the small aggregate, any GPR regs
+      // that are unallocated are marked as unavailable.
+      if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) {
+        llvm::Type *PaddingTy = llvm::ArrayType::get(
+            llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR);
+        it->info =
+            ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy);
+      }
+    }
+  }
+
+  llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
+                               CodeGenFunction &CGF) const;
+
+  llvm::Value *EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
+                              CodeGenFunction &CGF) const;
+
+  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                 CodeGenFunction &CGF) const {
+    return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
+                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+  }
+};
+
+class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
+      : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
+
+  StringRef getARCRetainAutoreleasedReturnValueMarker() const {
+    return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue";
+  }
+
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const { return 31; }
+
+  virtual bool doesReturnSlotInterfereWithArgs() const { return false; }
+};
+}
+
+static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                   ASTContext &Context,
+                                   uint64_t *HAMembers = nullptr);
+
+ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
+                                                unsigned &AllocatedVFP,
+                                                bool &IsHA,
+                                                unsigned &AllocatedGPR,
+                                                bool &IsSmallAggr,
+                                                bool IsNamedArg) const {
+  // Handle illegal vector types here.
+  if (isIllegalVectorType(Ty)) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size <= 32) {
+      llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
+      AllocatedGPR++;
+      return ABIArgInfo::getDirect(ResType);
+    }
+    if (Size == 64) {
+      llvm::Type *ResType =
+          llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
+      AllocatedVFP++;
+      return ABIArgInfo::getDirect(ResType);
+    }
+    if (Size == 128) {
+      llvm::Type *ResType =
+          llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
+      AllocatedVFP++;
+      return ABIArgInfo::getDirect(ResType);
+    }
+    AllocatedGPR++;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+  if (Ty->isVectorType())
+    // Size of a legal vector should be either 64 or 128.
+    AllocatedVFP++;
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->getKind() == BuiltinType::Half ||
+        BT->getKind() == BuiltinType::Float ||
+        BT->getKind() == BuiltinType::Double ||
+        BT->getKind() == BuiltinType::LongDouble)
+      AllocatedVFP++;
+  }
+
+  if (!isAggregateTypeForABI(Ty)) {
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+      Ty = EnumTy->getDecl()->getIntegerType();
+
+    if (!Ty->isFloatingType() && !Ty->isVectorType()) {
+      unsigned Alignment = getContext().getTypeAlign(Ty);
+      if (!isDarwinPCS() && Alignment > 64)
+        AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
+
+      int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
+      AllocatedGPR += RegsNeeded;
+    }
+    return (Ty->isPromotableIntegerType() && isDarwinPCS()
+                ? ABIArgInfo::getExtend()
+                : ABIArgInfo::getDirect());
+  }
+
+  // Structures with either a non-trivial destructor or a non-trivial
+  // copy constructor are always indirect.
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+    AllocatedGPR++;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
+                                          CGCXXABI::RAA_DirectInMemory);
+  }
+
+  // Empty records are always ignored on Darwin, but actually passed in C++ mode
+  // elsewhere for GNU compatibility.
+  if (isEmptyRecord(getContext(), Ty, true)) {
+    if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
+      return ABIArgInfo::getIgnore();
+
+    ++AllocatedGPR;
+    return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
+  }
+
+  // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
+  const Type *Base = nullptr;
+  uint64_t Members = 0;
+  if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+    IsHA = true;
+    if (!IsNamedArg && isDarwinPCS()) {
+      // With the Darwin ABI, variadic arguments are always passed on the stack
+      // and should not be expanded. Treat variadic HFAs as arrays of doubles.
+      uint64_t Size = getContext().getTypeSize(Ty);
+      llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext());
+      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
+    }
+    AllocatedVFP += Members;
+    return ABIArgInfo::getExpand();
+  }
+
+  // Aggregates <= 16 bytes are passed directly in registers or on the stack.
+  uint64_t Size = getContext().getTypeSize(Ty);
+  if (Size <= 128) {
+    unsigned Alignment = getContext().getTypeAlign(Ty);
+    if (!isDarwinPCS() && Alignment > 64)
+      AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
+
+    Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+    AllocatedGPR += Size / 64;
+    IsSmallAggr = true;
+    // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
+    // For aggregates with 16-byte alignment, we use i128.
+    if (Alignment < 128 && Size == 128) {
+      llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
+      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
+    }
+    return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
+  }
+
+  AllocatedGPR++;
+  return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+}
+
+ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
+  if (RetTy->isVoidType())
+    return ABIArgInfo::getIgnore();
+
+  // Large vector types should be returned via memory.
+  if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
+    return ABIArgInfo::getIndirect(0);
+
+  if (!isAggregateTypeForABI(RetTy)) {
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
+      RetTy = EnumTy->getDecl()->getIntegerType();
+
+    return (RetTy->isPromotableIntegerType() && isDarwinPCS()
+                ? ABIArgInfo::getExtend()
+                : ABIArgInfo::getDirect());
+  }
+
+  if (isEmptyRecord(getContext(), RetTy, true))
+    return ABIArgInfo::getIgnore();
+
+  const Type *Base = nullptr;
+  if (isHomogeneousAggregate(RetTy, Base, getContext()))
+    // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
+    return ABIArgInfo::getDirect();
+
+  // Aggregates <= 16 bytes are returned directly in registers or on the stack.
+  uint64_t Size = getContext().getTypeSize(RetTy);
+  if (Size <= 128) {
+    Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+    return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
+  }
+
+  return ABIArgInfo::getIndirect(0);
+}
+
+/// isIllegalVectorType - check whether the vector type is legal for AArch64.
+bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
+  if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    // Check whether VT is legal.
+    unsigned NumElements = VT->getNumElements();
+    uint64_t Size = getContext().getTypeSize(VT);
+    // NumElements should be power of 2 between 1 and 16.
+    if ((NumElements & (NumElements - 1)) != 0 || NumElements > 16)
+      return true;
+    return Size != 64 && (Size != 128 || NumElements == 1);
+  }
+  return false;
+}
+
+static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
+                                     int AllocatedGPR, int AllocatedVFP,
+                                     bool IsIndirect, CodeGenFunction &CGF) {
+  // The AArch64 va_list type and handling is specified in the Procedure Call
+  // Standard, section B.4:
+  //
+  // struct {
+  //   void *__stack;
+  //   void *__gr_top;
+  //   void *__vr_top;
+  //   int __gr_offs;
+  //   int __vr_offs;
+  // };
+
+  llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
+  llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
+  llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
+  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
+  auto &Ctx = CGF.getContext();
+
+  llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
+  int reg_top_index;
+  int RegSize;
+  if (AllocatedGPR) {
+    assert(!AllocatedVFP && "Arguments never split between int & VFP regs");
+    // 3 is the field number of __gr_offs
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
+    reg_top_index = 1; // field number for __gr_top
+    RegSize = 8 * AllocatedGPR;
+  } else {
+    assert(!AllocatedGPR && "Argument must go in VFP or int regs");
+    // 4 is the field number of __vr_offs.
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
+    reg_top_index = 2; // field number for __vr_top
+    RegSize = 16 * AllocatedVFP;
+  }
+
+  //=======================================
+  // Find out where argument was passed
+  //=======================================
+
+  // If reg_offs >= 0 we're already using the stack for this type of
+  // argument. We don't want to keep updating reg_offs (in case it overflows,
+  // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
+  // whatever they get).
+  llvm::Value *UsingStack = nullptr;
+  UsingStack = CGF.Builder.CreateICmpSGE(
+      reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
+
+  CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
+
+  // Otherwise, at least some kind of argument could go in these registers, the
+  // question is whether this particular type is too big.
+  CGF.EmitBlock(MaybeRegBlock);
+
+  // Integer arguments may need to correct register alignment (for example a
+  // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
+  // align __gr_offs to calculate the potential address.
+  if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+    int Align = Ctx.getTypeAlign(Ty) / 8;
+
+    reg_offs = CGF.Builder.CreateAdd(
+        reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
+        "align_regoffs");
+    reg_offs = CGF.Builder.CreateAnd(
+        reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
+        "aligned_regoffs");
+  }
+
+  // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
+  llvm::Value *NewOffset = nullptr;
+  NewOffset = CGF.Builder.CreateAdd(
+      reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
+  CGF.Builder.CreateStore(NewOffset, reg_offs_p);
+
+  // Now we're in a position to decide whether this argument really was in
+  // registers or not.
+  llvm::Value *InRegs = nullptr;
+  InRegs = CGF.Builder.CreateICmpSLE(
+      NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
+
+  CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
+
+  //=======================================
+  // Argument was in registers
+  //=======================================
+
+  // Now we emit the code for if the argument was originally passed in
+  // registers. First start the appropriate block:
+  CGF.EmitBlock(InRegBlock);
+
+  llvm::Value *reg_top_p = nullptr, *reg_top = nullptr;
+  reg_top_p =
+      CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
+  reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
+  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
+  llvm::Value *RegAddr = nullptr;
+  llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+
+  if (IsIndirect) {
+    // If it's been passed indirectly (actually a struct), whatever we find from
+    // stored registers or on the stack will actually be a struct **.
+    MemTy = llvm::PointerType::getUnqual(MemTy);
+  }
+
+  const Type *Base = nullptr;
+  uint64_t NumMembers;
+  bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers);
+  if (IsHFA && NumMembers > 1) {
+    // Homogeneous aggregates passed in registers will have their elements split
+    // and stored 16-bytes apart regardless of size (they're notionally in qN,
+    // qN+1, ...). We reload and store into a temporary local variable
+    // contiguously.
+    assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
+    llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
+    llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
+    llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
+    int Offset = 0;
+
+    if (CGF.CGM.getDataLayout().isBigEndian() && Ctx.getTypeSize(Base) < 128)
+      Offset = 16 - Ctx.getTypeSize(Base) / 8;
+    for (unsigned i = 0; i < NumMembers; ++i) {
+      llvm::Value *BaseOffset =
+          llvm::ConstantInt::get(CGF.Int32Ty, 16 * i + Offset);
+      llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
+      LoadAddr = CGF.Builder.CreateBitCast(
+          LoadAddr, llvm::PointerType::getUnqual(BaseTy));
+      llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
+
+      llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
+      CGF.Builder.CreateStore(Elem, StoreAddr);
+    }
+
+    RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
+  } else {
+    // Otherwise the object is contiguous in memory
+    unsigned BeAlign = reg_top_index == 2 ? 16 : 8;
+    if (CGF.CGM.getDataLayout().isBigEndian() &&
+        (IsHFA || !isAggregateTypeForABI(Ty)) &&
+        Ctx.getTypeSize(Ty) < (BeAlign * 8)) {
+      int Offset = BeAlign - Ctx.getTypeSize(Ty) / 8;
+      BaseAddr = CGF.Builder.CreatePtrToInt(BaseAddr, CGF.Int64Ty);
+
+      BaseAddr = CGF.Builder.CreateAdd(
+          BaseAddr, llvm::ConstantInt::get(CGF.Int64Ty, Offset), "align_be");
+
+      BaseAddr = CGF.Builder.CreateIntToPtr(BaseAddr, CGF.Int8PtrTy);
+    }
+
+    RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
+  }
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Argument was on the stack
+  //=======================================
+  CGF.EmitBlock(OnStackBlock);
+
+  llvm::Value *stack_p = nullptr, *OnStackAddr = nullptr;
+  stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
+  OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
+
+  // Again, stack arguments may need realigmnent. In this case both integer and
+  // floating-point ones might be affected.
+  if (!IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+    int Align = Ctx.getTypeAlign(Ty) / 8;
+
+    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+
+    OnStackAddr = CGF.Builder.CreateAdd(
+        OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
+        "align_stack");
+    OnStackAddr = CGF.Builder.CreateAnd(
+        OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
+        "align_stack");
+
+    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+  }
+
+  uint64_t StackSize;
+  if (IsIndirect)
+    StackSize = 8;
+  else
+    StackSize = Ctx.getTypeSize(Ty) / 8;
+
+  // All stack slots are 8 bytes
+  StackSize = llvm::RoundUpToAlignment(StackSize, 8);
+
+  llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
+  llvm::Value *NewStack =
+      CGF.Builder.CreateGEP(OnStackAddr, StackSizeC, "new_stack");
+
+  // Write the new value of __stack for the next call to va_arg
+  CGF.Builder.CreateStore(NewStack, stack_p);
+
+  if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
+      Ctx.getTypeSize(Ty) < 64) {
+    int Offset = 8 - Ctx.getTypeSize(Ty) / 8;
+    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+
+    OnStackAddr = CGF.Builder.CreateAdd(
+        OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, Offset), "align_be");
+
+    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+  }
+
+  OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Tidy up
+  //=======================================
+  CGF.EmitBlock(ContBlock);
+
+  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
+  ResAddr->addIncoming(RegAddr, InRegBlock);
+  ResAddr->addIncoming(OnStackAddr, OnStackBlock);
+
+  if (IsIndirect)
+    return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
+
+  return ResAddr;
+}
+
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                          CodeGenFunction &CGF) const {
+
+  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
+  bool IsHA = false, IsSmallAggr = false;
+  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
+                                       IsSmallAggr, false /*IsNamedArg*/);
+
+  return EmitAArch64VAArg(VAListAddr, Ty, AllocatedGPR, AllocatedVFP,
+                          AI.isIndirect(), CGF);
+}
+
+llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                           CodeGenFunction &CGF) const {
+  // We do not support va_arg for aggregates or illegal vector types.
+  // Lower VAArg here for these cases and use the LLVM va_arg instruction for
+  // other cases.
+  if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
+    return nullptr;
+
+  uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
+  uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
+
+  const Type *Base = nullptr;
+  bool isHA = isHomogeneousAggregate(Ty, Base, getContext());
+
+  bool isIndirect = false;
+  // Arguments bigger than 16 bytes which aren't homogeneous aggregates should
+  // be passed indirectly.
+  if (Size > 16 && !isHA) {
+    isIndirect = true;
+    Size = 8;
+    Align = 8;
+  }
+
+  llvm::Type *BP = llvm::Type::getInt8PtrTy(CGF.getLLVMContext());
+  llvm::Type *BPP = llvm::PointerType::getUnqual(BP);
+
+  CGBuilderTy &Builder = CGF.Builder;
+  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
+  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
+
+  if (isEmptyRecord(getContext(), Ty, true)) {
+    // These are ignored for parameter passing purposes.
+    llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+    return Builder.CreateBitCast(Addr, PTy);
+  }
+
+  const uint64_t MinABIAlign = 8;
+  if (Align > MinABIAlign) {
+    llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, Align - 1);
+    Addr = Builder.CreateGEP(Addr, Offset);
+    llvm::Value *AsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
+    llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, ~(Align - 1));
+    llvm::Value *Aligned = Builder.CreateAnd(AsInt, Mask);
+    Addr = Builder.CreateIntToPtr(Aligned, BP, "ap.align");
+  }
+
+  uint64_t Offset = llvm::RoundUpToAlignment(Size, MinABIAlign);
+  llvm::Value *NextAddr = Builder.CreateGEP(
+      Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next");
+  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
+
+  if (isIndirect)
+    Addr = Builder.CreateLoad(Builder.CreateBitCast(Addr, BPP));
+  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
+
+  return AddrTyped;
+}
+
+//===----------------------------------------------------------------------===//
 // ARM ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -3064,35 +4013,62 @@ public:
 
 private:
   ABIKind Kind;
+  mutable int VFPRegs[16];
+  const unsigned NumVFPs;
+  const unsigned NumGPRs;
+  mutable unsigned AllocatedGPRs;
+  mutable unsigned AllocatedVFPs;
 
 public:
-  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind),
+    NumVFPs(16), NumGPRs(4) {
     setRuntimeCC();
+    resetAllocatedRegs();
   }
 
   bool isEABI() const {
-    StringRef Env = getTarget().getTriple().getEnvironmentName();
-    return (Env == "gnueabi" || Env == "eabi" ||
-            Env == "android" || Env == "androideabi");
+    switch (getTarget().getTriple().getEnvironment()) {
+    case llvm::Triple::Android:
+    case llvm::Triple::EABI:
+    case llvm::Triple::EABIHF:
+    case llvm::Triple::GNUEABI:
+    case llvm::Triple::GNUEABIHF:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool isEABIHF() const {
+    switch (getTarget().getTriple().getEnvironment()) {
+    case llvm::Triple::EABIHF:
+    case llvm::Triple::GNUEABIHF:
+      return true;
+    default:
+      return false;
+    }
   }
 
   ABIKind getABIKind() const { return Kind; }
 
 private:
-  ABIArgInfo classifyReturnType(QualType RetTy) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, int *VFPRegs,
-                                  unsigned &AllocatedVFP,
-                                  bool &IsHA) const;
+  ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
+                                  bool &IsCPRC) const;
   bool isIllegalVectorType(QualType Ty) const;
 
-  virtual void computeInfo(CGFunctionInfo &FI) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 
   llvm::CallingConv::ID getLLVMDefaultCC() const;
   llvm::CallingConv::ID getABIDefaultCC() const;
   void setRuntimeCC();
+
+  void markAllocatedGPRs(unsigned Alignment, unsigned NumRequired) const;
+  void markAllocatedVFPs(unsigned Alignment, unsigned NumRequired) const;
+  void resetAllocatedRegs(void) const;
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -3104,16 +4080,16 @@ public:
     return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo());
   }
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     return 13;
   }
 
-  StringRef getARCRetainAutoreleasedReturnValueMarker() const {
+  StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
     return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue";
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const {
+                               llvm::Value *Address) const override {
     llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
 
     // 0-15 are the 16 integer registers.
@@ -3121,13 +4097,13 @@ public:
     return false;
   }
 
-  unsigned getSizeOfUnwindException() const {
+  unsigned getSizeOfUnwindException() const override {
     if (getABIInfo().isEABI()) return 88;
     return TargetCodeGenInfo::getSizeOfUnwindException();
   }
 
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM) const {
+                           CodeGen::CodeGenModule &CGM) const override {
     const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
     if (!FD)
       return;
@@ -3176,24 +4152,43 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
   // allocated to the lowest-numbered sequence of such registers.
   // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
   // unallocated are marked as unavailable. 
-  unsigned AllocatedVFP = 0;
-  int VFPRegs[16] = { 0 };
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it) {
-    unsigned PreAllocation = AllocatedVFP;
-    bool IsHA = false;
+  resetAllocatedRegs();
+
+  if (getCXXABI().classifyReturnType(FI)) {
+    if (FI.getReturnInfo().isIndirect())
+      markAllocatedGPRs(1, 1);
+  } else {
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
+  }
+  for (auto &I : FI.arguments()) {
+    unsigned PreAllocationVFPs = AllocatedVFPs;
+    unsigned PreAllocationGPRs = AllocatedGPRs;
+    bool IsCPRC = false;
     // 6.1.2.3 There is one VFP co-processor register class using registers
     // s0-s15 (d0-d7) for passing arguments.
-    const unsigned NumVFPs = 16;
-    it->info = classifyArgumentType(it->type, VFPRegs, AllocatedVFP, IsHA);
-    // If we do not have enough VFP registers for the HA, any VFP registers
-    // that are unallocated are marked as unavailable. To achieve this, we add
-    // padding of (NumVFPs - PreAllocation) floats.
-    if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
+    I.info = classifyArgumentType(I.type, FI.isVariadic(), IsCPRC);
+
+    // If we have allocated some arguments onto the stack (due to running
+    // out of VFP registers), we cannot split an argument between GPRs and
+    // the stack. If this situation occurs, we add padding to prevent the
+    // GPRs from being used. In this situation, the current argument could
+    // only be allocated by rule C.8, so rule C.6 would mark these GPRs as
+    // unusable anyway.
+    // We do not have to do this if the argument is being passed ByVal, as the
+    // backend can handle that situation correctly.
+    const bool StackUsed = PreAllocationGPRs > NumGPRs || PreAllocationVFPs > NumVFPs;
+    const bool IsByVal = I.info.isIndirect() && I.info.getIndirectByVal();
+    if (!IsCPRC && PreAllocationGPRs < NumGPRs && AllocatedGPRs > NumGPRs &&
+        StackUsed && !IsByVal) {
       llvm::Type *PaddingTy = llvm::ArrayType::get(
-          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
-      it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
+          llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs);
+      if (I.info.canHaveCoerceToType()) {
+        I.info = ABIArgInfo::getDirect(I.info.getCoerceToType() /* type */, 0 /* offset */,
+                                       PaddingTy);
+      } else {
+        I.info = ABIArgInfo::getDirect(nullptr /* type */, 0 /* offset */,
+                                       PaddingTy);
+      }
     }
   }
 
@@ -3209,7 +4204,7 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
 /// Return the default calling convention that LLVM will use.
 llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
   // The default calling convention that LLVM will infer.
-  if (getTarget().getTriple().getEnvironmentName()=="gnueabihf")
+  if (isEABIHF())
     return llvm::CallingConv::ARM_AAPCS_VFP;
   else if (isEABI())
     return llvm::CallingConv::ARM_AAPCS;
@@ -3243,8 +4238,7 @@ void ARMABIInfo::setRuntimeCC() {
 /// contained in the type is returned through it; this is used for the
 /// recursive calls that check aggregate component types.
 static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                   ASTContext &Context,
-                                   uint64_t *HAMembers = 0) {
+                                   ASTContext &Context, uint64_t *HAMembers) {
   uint64_t Members = 0;
   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
     if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members))
@@ -3256,9 +4250,7 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
       return false;
 
     Members = 0;
-    for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
-         i != e; ++i) {
-      const FieldDecl *FD = *i;
+    for (const auto *FD : RD->fields()) {
       uint64_t FldMembers;
       if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers))
         return false;
@@ -3293,10 +4285,29 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
     const Type *TyPtr = Ty.getTypePtr();
     if (!Base)
       Base = TyPtr;
-    if (Base != TyPtr &&
-        (!Base->isVectorType() || !TyPtr->isVectorType() ||
-         Context.getTypeSize(Base) != Context.getTypeSize(TyPtr)))
-      return false;
+
+    if (Base != TyPtr) {
+      // Homogeneous aggregates are defined as containing members with the
+      // same machine type. There are two cases in which two members have
+      // different TypePtrs but the same machine type:
+
+      // 1) Vectors of the same length, regardless of the type and number
+      //    of their members.
+      const bool SameLengthVectors = Base->isVectorType() && TyPtr->isVectorType()
+        && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
+
+      // 2) In the 32-bit AAPCS, `double' and `long double' have the same
+      //    machine type. This is not the case for the 64-bit AAPCS.
+      const bool SameSizeDoubles =
+           (   (   Base->isSpecificBuiltinType(BuiltinType::Double)
+                && TyPtr->isSpecificBuiltinType(BuiltinType::LongDouble))
+            || (   Base->isSpecificBuiltinType(BuiltinType::LongDouble)
+                && TyPtr->isSpecificBuiltinType(BuiltinType::Double)))
+        && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
+
+      if (!SameLengthVectors && !SameSizeDoubles)
+        return false;
+    }
   }
 
   // Homogeneous Aggregates can have at most 4 members of the base type.
@@ -3308,12 +4319,15 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
 
 /// markAllocatedVFPs - update VFPRegs according to the alignment and
 /// number of VFP registers (unit is S register) requested.
-static void markAllocatedVFPs(int *VFPRegs, unsigned &AllocatedVFP,
-                              unsigned Alignment,
-                              unsigned NumRequired) {
+void ARMABIInfo::markAllocatedVFPs(unsigned Alignment,
+                                   unsigned NumRequired) const {
   // Early Exit.
-  if (AllocatedVFP >= 16)
+  if (AllocatedVFPs >= 16) {
+    // We use AllocatedVFP > 16 to signal that some CPRCs were allocated on
+    // the stack.
+    AllocatedVFPs = 17;
     return;
+  }
   // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
   // VFP registers of the appropriate type unallocated then the argument is
   // allocated to the lowest-numbered sequence of such registers.
@@ -3327,7 +4341,7 @@ static void markAllocatedVFPs(int *VFPRegs, unsigned &AllocatedVFP,
     if (FoundSlot) {
       for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
         VFPRegs[J] = 1;
-      AllocatedVFP += NumRequired;
+      AllocatedVFPs += NumRequired;
       return;
     }
   }
@@ -3335,12 +4349,31 @@ static void markAllocatedVFPs(int *VFPRegs, unsigned &AllocatedVFP,
   // unallocated are marked as unavailable.
   for (unsigned I = 0; I < 16; I++)
     VFPRegs[I] = 1;
-  AllocatedVFP = 17; // We do not have enough VFP registers.
+  AllocatedVFPs = 17; // We do not have enough VFP registers.
 }
 
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
-                                            unsigned &AllocatedVFP,
-                                            bool &IsHA) const {
+/// Update AllocatedGPRs to record the number of general purpose registers
+/// which have been allocated. It is valid for AllocatedGPRs to go above 4,
+/// this represents arguments being stored on the stack.
+void ARMABIInfo::markAllocatedGPRs(unsigned Alignment,
+                                   unsigned NumRequired) const {
+  assert((Alignment == 1 || Alignment == 2) && "Alignment must be 4 or 8 bytes");
+
+  if (Alignment == 2 && AllocatedGPRs & 0x1)
+    AllocatedGPRs += 1;
+
+  AllocatedGPRs += NumRequired;
+}
+
+void ARMABIInfo::resetAllocatedRegs(void) const {
+  AllocatedGPRs = 0;
+  AllocatedVFPs = 0;
+  for (unsigned i = 0; i < NumVFPs; ++i)
+    VFPRegs[i] = 0;
+}
+
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
+                                            bool &IsCPRC) const {
   // We update number of allocated VFPs according to
   // 6.1.2.1 The following argument types are VFP CPRCs:
   //   A single-precision floating-point type (including promoted
@@ -3356,58 +4389,85 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
     if (Size <= 32) {
       llvm::Type *ResType =
           llvm::Type::getInt32Ty(getVMContext());
+      markAllocatedGPRs(1, 1);
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 64) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 2);
-      markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+      if (getABIKind() == ARMABIInfo::AAPCS || isVariadic){
+        markAllocatedGPRs(2, 2);
+      } else {
+        markAllocatedVFPs(2, 2);
+        IsCPRC = true;
+      }
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 4);
-      markAllocatedVFPs(VFPRegs, AllocatedVFP, 4, 4);
+      if (getABIKind() == ARMABIInfo::AAPCS || isVariadic) {
+        markAllocatedGPRs(2, 4);
+      } else {
+        markAllocatedVFPs(4, 4);
+        IsCPRC = true;
+      }
       return ABIArgInfo::getDirect(ResType);
     }
+    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
   }
   // Update VFPRegs for legal vector types.
-  if (const VectorType *VT = Ty->getAs<VectorType>()) {
-    uint64_t Size = getContext().getTypeSize(VT);
-    // Size of a legal vector should be power of 2 and above 64.
-    markAllocatedVFPs(VFPRegs, AllocatedVFP, Size >= 128 ? 4 : 2, Size / 32);
+  if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
+    if (const VectorType *VT = Ty->getAs<VectorType>()) {
+      uint64_t Size = getContext().getTypeSize(VT);
+      // Size of a legal vector should be power of 2 and above 64.
+      markAllocatedVFPs(Size >= 128 ? 4 : 2, Size / 32);
+      IsCPRC = true;
+    }
   }
   // Update VFPRegs for floating point types.
-  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->getKind() == BuiltinType::Half ||
-        BT->getKind() == BuiltinType::Float)
-      markAllocatedVFPs(VFPRegs, AllocatedVFP, 1, 1);
-    if (BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
-      markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+  if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
+    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+      if (BT->getKind() == BuiltinType::Half ||
+          BT->getKind() == BuiltinType::Float) {
+        markAllocatedVFPs(1, 1);
+        IsCPRC = true;
+      }
+      if (BT->getKind() == BuiltinType::Double ||
+          BT->getKind() == BuiltinType::LongDouble) {
+        markAllocatedVFPs(2, 2);
+        IsCPRC = true;
+      }
+    }
   }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
-    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
       Ty = EnumTy->getDecl()->getIntegerType();
+    }
 
+    unsigned Size = getContext().getTypeSize(Ty);
+    if (!IsCPRC)
+      markAllocatedGPRs(Size > 32 ? 2 : 1, (Size + 31) / 32);
     return (Ty->isPromotableIntegerType() ?
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
-  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+  }
 
   // Ignore empty records.
   if (isEmptyRecord(getContext(), Ty, true))
     return ABIArgInfo::getIgnore();
 
-  if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
+  if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
     // Homogeneous Aggregates need to be expanded when we can fit the aggregate
     // into VFP registers.
-    const Type *Base = 0;
+    const Type *Base = nullptr;
     uint64_t Members = 0;
     if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
@@ -3415,17 +4475,17 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
       if (Base->isVectorType()) {
         // ElementSize is in number of floats.
         unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
-        markAllocatedVFPs(VFPRegs, AllocatedVFP, ElementSize,
+        markAllocatedVFPs(ElementSize,
                           Members * ElementSize);
       } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
-        markAllocatedVFPs(VFPRegs, AllocatedVFP, 1, Members);
+        markAllocatedVFPs(1, Members);
       else {
         assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
                Base->isSpecificBuiltinType(BuiltinType::LongDouble));
-        markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, Members * 2);
+        markAllocatedVFPs(2, Members * 2);
       }
-      IsHA = true;
-      return ABIArgInfo::getExpand();
+      IsCPRC = true;
+      return ABIArgInfo::getDirect();
     }
   }
 
@@ -3439,7 +4499,12 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
       getABIKind() == ARMABIInfo::AAPCS)
     ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/true,
+    // Update Allocated GPRs. Since this is only used when the size of the
+    // argument is greater than 64 bytes, this will always use up any available
+    // registers (of which there are 4). We also don't care about getting the
+    // alignment right, because general-purpose registers cannot be back-filled.
+    markAllocatedGPRs(1, 4);
+    return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true,
            /*Realign=*/TyAlign > ABIAlign);
   }
 
@@ -3451,9 +4516,11 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
   if (getContext().getTypeAlign(Ty) <= 32) {
     ElemTy = llvm::Type::getInt32Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
+    markAllocatedGPRs(1, SizeRegs);
   } else {
     ElemTy = llvm::Type::getInt64Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
+    markAllocatedGPRs(2, SizeRegs * 2);
   }
 
   llvm::Type *STy =
@@ -3546,13 +4613,16 @@ static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
   return true;
 }
 
-ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
+ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
+                                          bool isVariadic) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // Large vector types should be returned via memory.
-  if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
+  if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
+    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0);
+  }
 
   if (!isAggregateTypeForABI(RetTy)) {
     // Treat an enum type as its underlying type.
@@ -3563,11 +4633,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
-  // Structures with either a non-trivial destructor or a non-trivial
-  // copy constructor are always indirect.
-  if (isRecordReturnIndirect(RetTy, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
   // Are we following APCS?
   if (getABIKind() == APCS) {
     if (isEmptyRecord(getContext(), RetTy, false))
@@ -3593,6 +4658,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
     }
 
     // Otherwise return in memory.
+    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0);
   }
 
@@ -3602,8 +4668,8 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getIgnore();
 
   // Check for homogeneous aggregates with AAPCS-VFP.
-  if (getABIKind() == AAPCS_VFP) {
-    const Type *Base = 0;
+  if (getABIKind() == AAPCS_VFP && !isVariadic) {
+    const Type *Base = nullptr;
     if (isHomogeneousAggregate(RetTy, Base, getContext())) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
@@ -3615,6 +4681,10 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
   // are returned indirectly.
   uint64_t Size = getContext().getTypeSize(RetTy);
   if (Size <= 32) {
+    if (getDataLayout().isBigEndian())
+      // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
+      return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
     // Return in the smallest viable integer type.
     if (Size <= 8)
       return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
@@ -3623,6 +4693,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
   }
 
+  markAllocatedGPRs(1, 1);
   return ABIArgInfo::getIndirect(0);
 }
 
@@ -3723,9 +4794,9 @@ class NaClARMABIInfo : public ABIInfo {
  public:
   NaClARMABIInfo(CodeGen::CodeGenTypes &CGT, ARMABIInfo::ABIKind Kind)
       : ABIInfo(CGT), PInfo(CGT), NInfo(CGT, Kind) {}
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
  private:
   PNaClABIInfo PInfo; // Used for generating calls with pnaclcall callingconv.
   ARMABIInfo NInfo; // Used for everything else.
@@ -3754,418 +4825,6 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 }
 
 //===----------------------------------------------------------------------===//
-// AArch64 ABI Implementation
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class AArch64ABIInfo : public ABIInfo {
-public:
-  AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
-
-private:
-  // The AArch64 PCS is explicit about return types and argument types being
-  // handled identically, so we don't need to draw a distinction between
-  // Argument and Return classification.
-  ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs,
-                                 int &FreeVFPRegs) const;
-
-  ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt,
-                        llvm::Type *DirectTy = 0) const;
-
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
-};
-
-class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
-public:
-  AArch64TargetCodeGenInfo(CodeGenTypes &CGT)
-    :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {}
-
-  const AArch64ABIInfo &getABIInfo() const {
-    return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
-  }
-
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
-    return 31;
-  }
-
-  bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const {
-    // 0-31 are x0-x30 and sp: 8 bytes each
-    llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
-    AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31);
-
-    // 64-95 are v0-v31: 16 bytes each
-    llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
-    AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95);
-
-    return false;
-  }
-
-};
-
-}
-
-void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  int FreeIntRegs = 8, FreeVFPRegs = 8;
-
-  FI.getReturnInfo() = classifyGenericType(FI.getReturnType(),
-                                           FreeIntRegs, FreeVFPRegs);
-
-  FreeIntRegs = FreeVFPRegs = 8;
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it) {
-    it->info = classifyGenericType(it->type, FreeIntRegs, FreeVFPRegs);
-
-  }
-}
-
-ABIArgInfo
-AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded,
-                           bool IsInt, llvm::Type *DirectTy) const {
-  if (FreeRegs >= RegsNeeded) {
-    FreeRegs -= RegsNeeded;
-    return ABIArgInfo::getDirect(DirectTy);
-  }
-
-  llvm::Type *Padding = 0;
-
-  // We need padding so that later arguments don't get filled in anyway. That
-  // wouldn't happen if only ByVal arguments followed in the same category, but
-  // a large structure will simply seem to be a pointer as far as LLVM is
-  // concerned.
-  if (FreeRegs > 0) {
-    if (IsInt)
-      Padding = llvm::Type::getInt64Ty(getVMContext());
-    else
-      Padding = llvm::Type::getFloatTy(getVMContext());
-
-    // Either [N x i64] or [N x float].
-    Padding = llvm::ArrayType::get(Padding, FreeRegs);
-    FreeRegs = 0;
-  }
-
-  return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8,
-                                 /*IsByVal=*/ true, /*Realign=*/ false,
-                                 Padding);
-}
-
-
-ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty,
-                                               int &FreeIntRegs,
-                                               int &FreeVFPRegs) const {
-  // Can only occurs for return, but harmless otherwise.
-  if (Ty->isVoidType())
-    return ABIArgInfo::getIgnore();
-
-  // Large vector types should be returned via memory. There's no such concept
-  // in the ABI, but they'd be over 16 bytes anyway so no matter how they're
-  // classified they'd go into memory (see B.3).
-  if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) {
-    if (FreeIntRegs > 0)
-      --FreeIntRegs;
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-  }
-
-  // All non-aggregate LLVM types have a concrete ABI representation so they can
-  // be passed directly. After this block we're guaranteed to be in a
-  // complicated case.
-  if (!isAggregateTypeForABI(Ty)) {
-    // Treat an enum type as its underlying type.
-    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
-      Ty = EnumTy->getDecl()->getIntegerType();
-
-    if (Ty->isFloatingType() || Ty->isVectorType())
-      return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false);
-
-    assert(getContext().getTypeSize(Ty) <= 128 &&
-           "unexpectedly large scalar type");
-
-    int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
-
-    // If the type may need padding registers to ensure "alignment", we must be
-    // careful when this is accounted for. Increasing the effective size covers
-    // all cases.
-    if (getContext().getTypeAlign(Ty) == 128)
-      RegsNeeded += FreeIntRegs % 2 != 0;
-
-    return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true);
-  }
-
-  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    if (FreeIntRegs > 0 && RAA == CGCXXABI::RAA_Indirect)
-      --FreeIntRegs;
-    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
-  }
-
-  if (isEmptyRecord(getContext(), Ty, true)) {
-    if (!getContext().getLangOpts().CPlusPlus) {
-      // Empty structs outside C++ mode are a GNU extension, so no ABI can
-      // possibly tell us what to do. It turns out (I believe) that GCC ignores
-      // the object for parameter-passsing purposes.
-      return ABIArgInfo::getIgnore();
-    }
-
-    // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode
-    // description of va_arg in the PCS require that an empty struct does
-    // actually occupy space for parameter-passing. I'm hoping for a
-    // clarification giving an explicit paragraph to point to in future.
-    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true,
-                      llvm::Type::getInt8Ty(getVMContext()));
-  }
-
-  // Homogeneous vector aggregates get passed in registers or on the stack.
-  const Type *Base = 0;
-  uint64_t NumMembers = 0;
-  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) {
-    assert(Base && "Base class should be set for homogeneous aggregate");
-    // Homogeneous aggregates are passed and returned directly.
-    return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers,
-                      /*IsInt=*/ false);
-  }
-
-  uint64_t Size = getContext().getTypeSize(Ty);
-  if (Size <= 128) {
-    // Small structs can use the same direct type whether they're in registers
-    // or on the stack.
-    llvm::Type *BaseTy;
-    unsigned NumBases;
-    int SizeInRegs = (Size + 63) / 64;
-
-    if (getContext().getTypeAlign(Ty) == 128) {
-      BaseTy = llvm::Type::getIntNTy(getVMContext(), 128);
-      NumBases = 1;
-
-      // If the type may need padding registers to ensure "alignment", we must
-      // be careful when this is accounted for. Increasing the effective size
-      // covers all cases.
-      SizeInRegs += FreeIntRegs % 2 != 0;
-    } else {
-      BaseTy = llvm::Type::getInt64Ty(getVMContext());
-      NumBases = SizeInRegs;
-    }
-    llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases);
-
-    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs,
-                      /*IsInt=*/ true, DirectTy);
-  }
-
-  // If the aggregate is > 16 bytes, it's passed and returned indirectly. In
-  // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere.
-  --FreeIntRegs;
-  return ABIArgInfo::getIndirect(0, /* byVal = */ false);
-}
-
-llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
-  // The AArch64 va_list type and handling is specified in the Procedure Call
-  // Standard, section B.4:
-  //
-  // struct {
-  //   void *__stack;
-  //   void *__gr_top;
-  //   void *__vr_top;
-  //   int __gr_offs;
-  //   int __vr_offs;
-  // };
-
-  assert(!CGF.CGM.getDataLayout().isBigEndian()
-         && "va_arg not implemented for big-endian AArch64");
-
-  int FreeIntRegs = 8, FreeVFPRegs = 8;
-  Ty = CGF.getContext().getCanonicalType(Ty);
-  ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs);
-
-  llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
-  llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
-  llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
-  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
-
-  llvm::Value *reg_offs_p = 0, *reg_offs = 0;
-  int reg_top_index;
-  int RegSize;
-  if (FreeIntRegs < 8) {
-    assert(FreeVFPRegs == 8 && "Arguments never split between int & VFP regs");
-    // 3 is the field number of __gr_offs
-    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
-    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
-    reg_top_index = 1; // field number for __gr_top
-    RegSize = 8 * (8 - FreeIntRegs);
-  } else {
-    assert(FreeVFPRegs < 8 && "Argument must go in VFP or int regs");
-    // 4 is the field number of __vr_offs.
-    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
-    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
-    reg_top_index = 2; // field number for __vr_top
-    RegSize = 16 * (8 - FreeVFPRegs);
-  }
-
-  //=======================================
-  // Find out where argument was passed
-  //=======================================
-
-  // If reg_offs >= 0 we're already using the stack for this type of
-  // argument. We don't want to keep updating reg_offs (in case it overflows,
-  // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
-  // whatever they get).
-  llvm::Value *UsingStack = 0;
-  UsingStack = CGF.Builder.CreateICmpSGE(reg_offs,
-                                         llvm::ConstantInt::get(CGF.Int32Ty, 0));
-
-  CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
-
-  // Otherwise, at least some kind of argument could go in these registers, the
-  // quesiton is whether this particular type is too big.
-  CGF.EmitBlock(MaybeRegBlock);
-
-  // Integer arguments may need to correct register alignment (for example a
-  // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
-  // align __gr_offs to calculate the potential address.
-  if (FreeIntRegs < 8 && AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
-    int Align = getContext().getTypeAlign(Ty) / 8;
-
-    reg_offs = CGF.Builder.CreateAdd(reg_offs,
-                                 llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
-                                 "align_regoffs");
-    reg_offs = CGF.Builder.CreateAnd(reg_offs,
-                                    llvm::ConstantInt::get(CGF.Int32Ty, -Align),
-                                    "aligned_regoffs");
-  }
-
-  // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
-  llvm::Value *NewOffset = 0;
-  NewOffset = CGF.Builder.CreateAdd(reg_offs,
-                                    llvm::ConstantInt::get(CGF.Int32Ty, RegSize),
-                                    "new_reg_offs");
-  CGF.Builder.CreateStore(NewOffset, reg_offs_p);
-
-  // Now we're in a position to decide whether this argument really was in
-  // registers or not.
-  llvm::Value *InRegs = 0;
-  InRegs = CGF.Builder.CreateICmpSLE(NewOffset,
-                                     llvm::ConstantInt::get(CGF.Int32Ty, 0),
-                                     "inreg");
-
-  CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
-
-  //=======================================
-  // Argument was in registers
-  //=======================================
-
-  // Now we emit the code for if the argument was originally passed in
-  // registers. First start the appropriate block:
-  CGF.EmitBlock(InRegBlock);
-
-  llvm::Value *reg_top_p = 0, *reg_top = 0;
-  reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
-  reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
-  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
-  llvm::Value *RegAddr = 0;
-  llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
-
-  if (!AI.isDirect()) {
-    // If it's been passed indirectly (actually a struct), whatever we find from
-    // stored registers or on the stack will actually be a struct **.
-    MemTy = llvm::PointerType::getUnqual(MemTy);
-  }
-
-  const Type *Base = 0;
-  uint64_t NumMembers;
-  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)
-      && NumMembers > 1) {
-    // Homogeneous aggregates passed in registers will have their elements split
-    // and stored 16-bytes apart regardless of size (they're notionally in qN,
-    // qN+1, ...). We reload and store into a temporary local variable
-    // contiguously.
-    assert(AI.isDirect() && "Homogeneous aggregates should be passed directly");
-    llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
-    llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
-    llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
-
-    for (unsigned i = 0; i < NumMembers; ++i) {
-      llvm::Value *BaseOffset = llvm::ConstantInt::get(CGF.Int32Ty, 16 * i);
-      llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
-      LoadAddr = CGF.Builder.CreateBitCast(LoadAddr,
-                                           llvm::PointerType::getUnqual(BaseTy));
-      llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
-
-      llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
-      CGF.Builder.CreateStore(Elem, StoreAddr);
-    }
-
-    RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
-  } else {
-    // Otherwise the object is contiguous in memory
-    RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
-  }
-
-  CGF.EmitBranch(ContBlock);
-
-  //=======================================
-  // Argument was on the stack
-  //=======================================
-  CGF.EmitBlock(OnStackBlock);
-
-  llvm::Value *stack_p = 0, *OnStackAddr = 0;
-  stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
-  OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
-
-  // Again, stack arguments may need realigmnent. In this case both integer and
-  // floating-point ones might be affected.
-  if (AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
-    int Align = getContext().getTypeAlign(Ty) / 8;
-
-    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
-
-    OnStackAddr = CGF.Builder.CreateAdd(OnStackAddr,
-                                 llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
-                                 "align_stack");
-    OnStackAddr = CGF.Builder.CreateAnd(OnStackAddr,
-                                    llvm::ConstantInt::get(CGF.Int64Ty, -Align),
-                                    "align_stack");
-
-    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
-  }
-
-  uint64_t StackSize;
-  if (AI.isDirect())
-    StackSize = getContext().getTypeSize(Ty) / 8;
-  else
-    StackSize = 8;
-
-  // All stack slots are 8 bytes
-  StackSize = llvm::RoundUpToAlignment(StackSize, 8);
-
-  llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
-  llvm::Value *NewStack = CGF.Builder.CreateGEP(OnStackAddr, StackSizeC,
-                                                "new_stack");
-
-  // Write the new value of __stack for the next call to va_arg
-  CGF.Builder.CreateStore(NewStack, stack_p);
-
-  OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
-
-  CGF.EmitBranch(ContBlock);
-
-  //=======================================
-  // Tidy up
-  //=======================================
-  CGF.EmitBlock(ContBlock);
-
-  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
-  ResAddr->addIncoming(RegAddr, InRegBlock);
-  ResAddr->addIncoming(OnStackAddr, OnStackBlock);
-
-  if (AI.isDirect())
-    return ResAddr;
-
-  return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
-}
-
-//===----------------------------------------------------------------------===//
 // NVPTX ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -4178,20 +4837,22 @@ public:
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CFG) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CFG) const override;
 };
 
 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
-    
-  virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                                   CodeGen::CodeGenModule &M) const;
+
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &M) const override;
 private:
-  static void addKernelMetadata(llvm::Function *F);
+  // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
+  // resulting MDNode to the nvvm.annotations MDNode.
+  static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand);
 };
 
 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
@@ -4220,10 +4881,10 @@ ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
 }
 
 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classifyArgumentType(it->type);
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  for (auto &I : FI.arguments())
+    I.info = classifyArgumentType(I.type);
 
   // Always honor user-specified calling convention.
   if (FI.getCallingConvention() != llvm::CallingConv::C)
@@ -4251,7 +4912,8 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
     // By default, all functions are device functions
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL __kernel functions get kernel metadata
-      addKernelMetadata(F);
+      // Create !{<func-ref>, metadata !"kernel", i32 1} node
+      addNVVMMetadata(F, "kernel", 1);
       // And kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
     }
@@ -4262,28 +4924,41 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
     // CUDA __global__ functions get a kernel metadata entry.  Since
     // __global__ functions cannot be called from the device, we do not
     // need to set the noinline attribute.
-    if (FD->getAttr<CUDAGlobalAttr>())
-      addKernelMetadata(F);
+    if (FD->hasAttr<CUDAGlobalAttr>()) {
+      // Create !{<func-ref>, metadata !"kernel", i32 1} node
+      addNVVMMetadata(F, "kernel", 1);
+    }
+    if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
+      // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+      addNVVMMetadata(F, "maxntidx",
+                      FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
+      // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
+      // zero value from getMinBlocks either means it was not specified in
+      // __launch_bounds__ or the user specified a 0 value. In both cases, we
+      // don't have to add a PTX directive.
+      int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
+      if (MinCTASM > 0) {
+        // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+        addNVVMMetadata(F, "minctasm", MinCTASM);
+      }
+    }
   }
 }
 
-void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
+                                             int Operand) {
   llvm::Module *M = F->getParent();
   llvm::LLVMContext &Ctx = M->getContext();
 
   // Get "nvvm.annotations" metadata node
   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
 
-  // Create !{<func-ref>, metadata !"kernel", i32 1} node
-  llvm::SmallVector<llvm::Value *, 3> MDVals;
-  MDVals.push_back(F);
-  MDVals.push_back(llvm::MDString::get(Ctx, "kernel"));
-  MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1));
-
+  llvm::Value *MDVals[] = {
+      F, llvm::MDString::get(Ctx, Name),
+      llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand)};
   // Append metadata to nvvm.annotations
   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
 }
-
 }
 
 //===----------------------------------------------------------------------===//
@@ -4303,15 +4978,15 @@ public:
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType ArgTy) const;
 
-  virtual void computeInfo(CGFunctionInfo &FI) const {
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it)
-      it->info = classifyArgumentType(it->type);
+  void computeInfo(CGFunctionInfo &FI) const override {
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (auto &I : FI.arguments())
+      I.info = classifyArgumentType(I.type);
   }
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4363,9 +5038,8 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
 
     // If this is a C++ record, check the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
-      for (CXXRecordDecl::base_class_const_iterator I = CXXRD->bases_begin(),
-             E = CXXRD->bases_end(); I != E; ++I) {
-        QualType Base = I->getType();
+      for (const auto &I : CXXRD->bases()) {
+        QualType Base = I.getType();
 
         // Empty bases don't affect things either way.
         if (isEmptyRecord(getContext(), Base, true))
@@ -4379,10 +5053,7 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
       }
 
     // Check the fields.
-    for (RecordDecl::field_iterator I = RD->field_begin(),
-           E = RD->field_end(); I != E; ++I) {
-      const FieldDecl *FD = *I;
-
+    for (const auto *FD : RD->fields()) {
       // Empty bitfields don't affect things either way.
       // Unlike isSingleElementStruct(), empty structure and array fields
       // do count.  So do anonymous bitfields that aren't zero-sized.
@@ -4455,7 +5126,7 @@ llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   llvm::Type *IndexTy = RegCount->getType();
   llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
   llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
-						  "fits_in_regs");
+                                                 "fits_in_regs");
 
   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
   llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
@@ -4521,37 +5192,6 @@ llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   return ResAddr;
 }
 
-bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
-    const llvm::Triple &Triple, const CodeGenOptions &Opts) {
-  assert(Triple.getArch() == llvm::Triple::x86);
-
-  switch (Opts.getStructReturnConvention()) {
-  case CodeGenOptions::SRCK_Default:
-    break;
-  case CodeGenOptions::SRCK_OnStack:  // -fpcc-struct-return
-    return false;
-  case CodeGenOptions::SRCK_InRegs:  // -freg-struct-return
-    return true;
-  }
-
-  if (Triple.isOSDarwin())
-    return true;
-
-  switch (Triple.getOS()) {
-  case llvm::Triple::Cygwin:
-  case llvm::Triple::MinGW32:
-  case llvm::Triple::AuroraUX:
-  case llvm::Triple::DragonFly:
-  case llvm::Triple::FreeBSD:
-  case llvm::Triple::OpenBSD:
-  case llvm::Triple::Bitrig:
-  case llvm::Triple::Win32:
-    return true;
-  default:
-    return false;
-  }
-}
-
 ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
@@ -4600,7 +5240,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
   if (isCompoundType(Ty))
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
-  return ABIArgInfo::getDirect(0);
+  return ABIArgInfo::getDirect(nullptr);
 }
 
 //===----------------------------------------------------------------------===//
@@ -4614,7 +5254,7 @@ public:
   MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &M) const;
+                           CodeGen::CodeGenModule &M) const override;
 };
 
 }
@@ -4635,9 +5275,8 @@ void MSP430TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
 
       // Step 3: Emit ISR vector alias.
       unsigned Num = attr->getNumber() / 2;
-      new llvm::GlobalAlias(GV->getType(), llvm::Function::ExternalLinkage,
-                            "__isr_" + Twine(Num),
-                            GV, &M.getModule());
+      llvm::GlobalAlias::create(llvm::Function::ExternalLinkage,
+                                "__isr_" + Twine(Num), F);
     }
   }
 }
@@ -4663,9 +5302,9 @@ public:
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const;
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4675,12 +5314,12 @@ public:
     : TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)),
       SizeOfUnwindException(IsO32 ? 24 : 32) {}
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 29;
   }
 
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                           CodeGen::CodeGenModule &CGM) const {
+                           CodeGen::CodeGenModule &CGM) const override {
     const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
     if (!FD) return;
     llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -4693,9 +5332,9 @@ public:
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const;
+                               llvm::Value *Address) const override;
 
-  unsigned getSizeOfUnwindException() const {
+  unsigned getSizeOfUnwindException() const override {
     return SizeOfUnwindException;
   }
 };
@@ -4778,7 +5417,7 @@ llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
 llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset,
                                         uint64_t Offset) const {
   if (OrigOffset + MinABIStackAlignInBytes > Offset)
-    return 0;
+    return nullptr;
 
   return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
 }
@@ -4819,7 +5458,7 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
     return ABIArgInfo::getExtend();
 
   return ABIArgInfo::getDirect(
-      0, 0, IsO32 ? 0 : getPaddingType(OrigOffset, CurrOffset));
+      nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
 }
 
 llvm::Type*
@@ -4871,9 +5510,6 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getIgnore();
 
   if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) {
-    if (isRecordReturnIndirect(RetTy, getCXXABI()))
-      return ABIArgInfo::getIndirect(0);
-
     if (Size <= 128) {
       if (RetTy->isAnyComplexType())
         return ABIArgInfo::getDirect();
@@ -4899,14 +5535,14 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
 
 void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
   ABIArgInfo &RetInfo = FI.getReturnInfo();
-  RetInfo = classifyReturnType(FI.getReturnType());
+  if (!getCXXABI().classifyReturnType(FI))
+    RetInfo = classifyReturnType(FI.getReturnType());
 
   // Check if a pointer to an aggregate is passed as a hidden argument.  
   uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
 
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classifyArgumentType(it->type, Offset);
+  for (auto &I : FI.arguments())
+    I.info = classifyArgumentType(I.type, Offset);
 }
 
 llvm::Value* MipsABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -4987,8 +5623,8 @@ public:
   TCETargetCodeGenInfo(CodeGenTypes &CGT)
     : DefaultTargetCodeGenInfo(CGT) {}
 
-  virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                                   CodeGen::CodeGenModule &M) const;
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &M) const override;
 };
 
 void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D,
@@ -5003,9 +5639,8 @@ void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D,
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL C Kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
-          
-      if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) {
-
+      const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
+      if (Attr) {
         // Convert the reqd_work_group_size() attributes to metadata.
         llvm::LLVMContext &Context = F->getContext();
         llvm::NamedMDNode *OpenCLMetadata = 
@@ -5015,14 +5650,11 @@ void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D,
         Operands.push_back(F);
 
         Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty, 
-                             llvm::APInt(32, 
-                             FD->getAttr<ReqdWorkGroupSizeAttr>()->getXDim())));
+                             llvm::APInt(32, Attr->getXDim())));
         Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty,
-                             llvm::APInt(32,
-                               FD->getAttr<ReqdWorkGroupSizeAttr>()->getYDim())));
+                             llvm::APInt(32, Attr->getYDim())));
         Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty, 
-                             llvm::APInt(32, 
-                               FD->getAttr<ReqdWorkGroupSizeAttr>()->getZDim())));
+                             llvm::APInt(32, Attr->getZDim())));
 
         // Add a boolean constant operand for "required" (true) or "hint" (false)
         // for implementing the work_group_size_hint attr later. Currently 
@@ -5053,10 +5685,10 @@ private:
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
-  virtual void computeInfo(CGFunctionInfo &FI) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
 class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5064,7 +5696,7 @@ public:
   HexagonTargetCodeGenInfo(CodeGenTypes &CGT)
     :TargetCodeGenInfo(new HexagonABIInfo(CGT)) {}
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     return 29;
   }
 };
@@ -5072,10 +5704,10 @@ public:
 }
 
 void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classifyArgumentType(it->type);
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  for (auto &I : FI.arguments())
+    I.info = classifyArgumentType(I.type);
 }
 
 ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
@@ -5126,11 +5758,6 @@ ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
-  // Structures with either a non-trivial destructor or a non-trivial
-  // copy constructor are always indirect.
-  if (isRecordReturnIndirect(RetTy, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
   if (isEmptyRecord(getContext(), RetTy, true))
     return ABIArgInfo::getIgnore();
 
@@ -5208,9 +5835,9 @@ public:
 
 private:
   ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
-  virtual void computeInfo(CGFunctionInfo &FI) const;
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 
   // Coercion type builder for structs passed in registers. The coercion type
   // serves two purposes:
@@ -5349,6 +5976,11 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
   if (!isAggregateTypeForABI(Ty))
     return ABIArgInfo::getDirect();
 
+  // If a C++ object has either a non-trivial copy constructor or a non-trivial
+  // destructor, it is passed with an explicit indirect pointer / sret pointer.
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+
   // This is a small aggregate type that should be passed in registers.
   // Build a coercion type from the LLVM struct type.
   llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
@@ -5385,6 +6017,7 @@ llvm::Value *SparcV9ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
+  case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
 
   case ABIArgInfo::Extend:
@@ -5420,9 +6053,8 @@ llvm::Value *SparcV9ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 
 void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
   FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
-  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classifyType(it->type, 16 * 8);
+  for (auto &I : FI.arguments())
+    I.info = classifyType(I.type, 16 * 8);
 }
 
 namespace {
@@ -5430,26 +6062,165 @@ class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   SparcV9TargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {}
+
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
+    return 14;
+  }
+
+  bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                               llvm::Value *Address) const override;
 };
 } // end anonymous namespace
 
+bool
+SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                                                llvm::Value *Address) const {
+  // This is calculated from the LLVM and GCC tables and verified
+  // against gcc output.  AFAIK all ABIs use the same encoding.
+
+  CodeGen::CGBuilderTy &Builder = CGF.Builder;
+
+  llvm::IntegerType *i8 = CGF.Int8Ty;
+  llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
+  llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
+
+  // 0-31: the 8-byte general-purpose registers
+  AssignToArrayRange(Builder, Address, Eight8, 0, 31);
+
+  // 32-63: f0-31, the 4-byte floating-point registers
+  AssignToArrayRange(Builder, Address, Four8, 32, 63);
+
+  //   Y   = 64
+  //   PSR = 65
+  //   WIM = 66
+  //   TBR = 67
+  //   PC  = 68
+  //   NPC = 69
+  //   FSR = 70
+  //   CSR = 71
+  AssignToArrayRange(Builder, Address, Eight8, 64, 71);
+   
+  // 72-87: d0-15, the 8-byte floating-point registers
+  AssignToArrayRange(Builder, Address, Eight8, 72, 87);
+
+  return false;
+}
+
 
 //===----------------------------------------------------------------------===//
-// Xcore ABI Implementation
+// XCore ABI Implementation
 //===----------------------------------------------------------------------===//
+
 namespace {
+
+/// A SmallStringEnc instance is used to build up the TypeString by passing
+/// it by reference between functions that append to it.
+typedef llvm::SmallString<128> SmallStringEnc;
+
+/// TypeStringCache caches the meta encodings of Types.
+///
+/// The reason for caching TypeStrings is two fold:
+///   1. To cache a type's encoding for later uses;
+///   2. As a means to break recursive member type inclusion.
+///
+/// A cache Entry can have a Status of:
+///   NonRecursive:   The type encoding is not recursive;
+///   Recursive:      The type encoding is recursive;
+///   Incomplete:     An incomplete TypeString;
+///   IncompleteUsed: An incomplete TypeString that has been used in a
+///                   Recursive type encoding.
+///
+/// A NonRecursive entry will have all of its sub-members expanded as fully
+/// as possible. Whilst it may contain types which are recursive, the type
+/// itself is not recursive and thus its encoding may be safely used whenever
+/// the type is encountered.
+///
+/// A Recursive entry will have all of its sub-members expanded as fully as
+/// possible. The type itself is recursive and it may contain other types which
+/// are recursive. The Recursive encoding must not be used during the expansion
+/// of a recursive type's recursive branch. For simplicity the code uses
+/// IncompleteCount to reject all usage of Recursive encodings for member types.
+///
+/// An Incomplete entry is always a RecordType and only encodes its
+/// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and
+/// are placed into the cache during type expansion as a means to identify and
+/// handle recursive inclusion of types as sub-members. If there is recursion
+/// the entry becomes IncompleteUsed.
+///
+/// During the expansion of a RecordType's members:
+///
+///   If the cache contains a NonRecursive encoding for the member type, the
+///   cached encoding is used;
+///
+///   If the cache contains a Recursive encoding for the member type, the
+///   cached encoding is 'Swapped' out, as it may be incorrect, and...
+///
+///   If the member is a RecordType, an Incomplete encoding is placed into the
+///   cache to break potential recursive inclusion of itself as a sub-member;
+///
+///   Once a member RecordType has been expanded, its temporary incomplete
+///   entry is removed from the cache. If a Recursive encoding was swapped out
+///   it is swapped back in;
+///
+///   If an incomplete entry is used to expand a sub-member, the incomplete
+///   entry is marked as IncompleteUsed. The cache keeps count of how many
+///   IncompleteUsed entries it currently contains in IncompleteUsedCount;
+///
+///   If a member's encoding is found to be a NonRecursive or Recursive viz:
+///   IncompleteUsedCount==0, the member's encoding is added to the cache.
+///   Else the member is part of a recursive type and thus the recursion has
+///   been exited too soon for the encoding to be correct for the member.
+///
+class TypeStringCache {
+  enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed};
+  struct Entry {
+    std::string Str;     // The encoded TypeString for the type.
+    enum Status State;   // Information about the encoding in 'Str'.
+    std::string Swapped; // A temporary place holder for a Recursive encoding
+                         // during the expansion of RecordType's members.
+  };
+  std::map<const IdentifierInfo *, struct Entry> Map;
+  unsigned IncompleteCount;     // Number of Incomplete entries in the Map.
+  unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map.
+public:
+  TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {};
+  void addIncomplete(const IdentifierInfo *ID, std::string StubEnc);
+  bool removeIncomplete(const IdentifierInfo *ID);
+  void addIfComplete(const IdentifierInfo *ID, StringRef Str,
+                     bool IsRecursive);
+  StringRef lookupStr(const IdentifierInfo *ID);
+};
+
+/// TypeString encodings for enum & union fields must be order.
+/// FieldEncoding is a helper for this ordering process.
+class FieldEncoding {
+  bool HasName;
+  std::string Enc;
+public:
+  FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {};
+  StringRef str() {return Enc.c_str();};
+  bool operator<(const FieldEncoding &rhs) const {
+    if (HasName != rhs.HasName) return HasName;
+    return Enc < rhs.Enc;
+  }
+};
+
 class XCoreABIInfo : public DefaultABIInfo {
 public:
   XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const;
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override;
 };
 
-class XcoreTargetCodeGenInfo : public TargetCodeGenInfo {
+class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
+  mutable TypeStringCache TSC;
 public:
-  XcoreTargetCodeGenInfo(CodeGenTypes &CGT)
+  XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
     :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {}
+  void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                    CodeGen::CodeGenModule &M) const override;
 };
+
 } // End anonymous namespace.
 
 llvm::Value *XCoreABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -5471,6 +6242,7 @@ llvm::Value *XCoreABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   uint64_t ArgSize = 0;
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
+  case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
   case ABIArgInfo::Ignore:
     Val = llvm::UndefValue::get(ArgPtrTy);
@@ -5500,6 +6272,461 @@ llvm::Value *XCoreABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   return Val;
 }
 
+/// During the expansion of a RecordType, an incomplete TypeString is placed
+/// into the cache as a means to identify and break recursion.
+/// If there is a Recursive encoding in the cache, it is swapped out and will
+/// be reinserted by removeIncomplete().
+/// All other types of encoding should have been used rather than arriving here.
+void TypeStringCache::addIncomplete(const IdentifierInfo *ID,
+                                    std::string StubEnc) {
+  if (!ID)
+    return;
+  Entry &E = Map[ID];
+  assert( (E.Str.empty() || E.State == Recursive) &&
+         "Incorrectly use of addIncomplete");
+  assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()");
+  E.Swapped.swap(E.Str); // swap out the Recursive
+  E.Str.swap(StubEnc);
+  E.State = Incomplete;
+  ++IncompleteCount;
+}
+
+/// Once the RecordType has been expanded, the temporary incomplete TypeString
+/// must be removed from the cache.
+/// If a Recursive was swapped out by addIncomplete(), it will be replaced.
+/// Returns true if the RecordType was defined recursively.
+bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) {
+  if (!ID)
+    return false;
+  auto I = Map.find(ID);
+  assert(I != Map.end() && "Entry not present");
+  Entry &E = I->second;
+  assert( (E.State == Incomplete ||
+           E.State == IncompleteUsed) &&
+         "Entry must be an incomplete type");
+  bool IsRecursive = false;
+  if (E.State == IncompleteUsed) {
+    // We made use of our Incomplete encoding, thus we are recursive.
+    IsRecursive = true;
+    --IncompleteUsedCount;
+  }
+  if (E.Swapped.empty())
+    Map.erase(I);
+  else {
+    // Swap the Recursive back.
+    E.Swapped.swap(E.Str);
+    E.Swapped.clear();
+    E.State = Recursive;
+  }
+  --IncompleteCount;
+  return IsRecursive;
+}
+
+/// Add the encoded TypeString to the cache only if it is NonRecursive or
+/// Recursive (viz: all sub-members were expanded as fully as possible).
+void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str,
+                                    bool IsRecursive) {
+  if (!ID || IncompleteUsedCount)
+    return; // No key or it is is an incomplete sub-type so don't add.
+  Entry &E = Map[ID];
+  if (IsRecursive && !E.Str.empty()) {
+    assert(E.State==Recursive && E.Str.size() == Str.size() &&
+           "This is not the same Recursive entry");
+    // The parent container was not recursive after all, so we could have used
+    // this Recursive sub-member entry after all, but we assumed the worse when
+    // we started viz: IncompleteCount!=0.
+    return;
+  }
+  assert(E.Str.empty() && "Entry already present");
+  E.Str = Str.str();
+  E.State = IsRecursive? Recursive : NonRecursive;
+}
+
+/// Return a cached TypeString encoding for the ID. If there isn't one, or we
+/// are recursively expanding a type (IncompleteCount != 0) and the cached
+/// encoding is Recursive, return an empty StringRef.
+StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) {
+  if (!ID)
+    return StringRef();   // We have no key.
+  auto I = Map.find(ID);
+  if (I == Map.end())
+    return StringRef();   // We have no encoding.
+  Entry &E = I->second;
+  if (E.State == Recursive && IncompleteCount)
+    return StringRef();   // We don't use Recursive encodings for member types.
+
+  if (E.State == Incomplete) {
+    // The incomplete type is being used to break out of recursion.
+    E.State = IncompleteUsed;
+    ++IncompleteUsedCount;
+  }
+  return E.Str.c_str();
+}
+
+/// The XCore ABI includes a type information section that communicates symbol
+/// type information to the linker. The linker uses this information to verify
+/// safety/correctness of things such as array bound and pointers et al.
+/// The ABI only requires C (and XC) language modules to emit TypeStrings.
+/// This type information (TypeString) is emitted into meta data for all global
+/// symbols: definitions, declarations, functions & variables.
+///
+/// The TypeString carries type, qualifier, name, size & value details.
+/// Please see 'Tools Development Guide' section 2.16.2 for format details:
+/// <https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf>
+/// The output is tested by test/CodeGen/xcore-stringtype.c.
+///
+static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
+                          CodeGen::CodeGenModule &CGM, TypeStringCache &TSC);
+
+/// XCore uses emitTargetMD to emit TypeString metadata for global symbols.
+void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                                          CodeGen::CodeGenModule &CGM) const {
+  SmallStringEnc Enc;
+  if (getTypeString(Enc, D, CGM, TSC)) {
+    llvm::LLVMContext &Ctx = CGM.getModule().getContext();
+    llvm::SmallVector<llvm::Value *, 2> MDVals;
+    MDVals.push_back(GV);
+    MDVals.push_back(llvm::MDString::get(Ctx, Enc.str()));
+    llvm::NamedMDNode *MD =
+      CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
+    MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+  }
+}
+
+static bool appendType(SmallStringEnc &Enc, QualType QType,
+                       const CodeGen::CodeGenModule &CGM,
+                       TypeStringCache &TSC);
+
+/// Helper function for appendRecordType().
+/// Builds a SmallVector containing the encoded field types in declaration order.
+static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
+                             const RecordDecl *RD,
+                             const CodeGen::CodeGenModule &CGM,
+                             TypeStringCache &TSC) {
+  for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end();
+       I != E; ++I) {
+    SmallStringEnc Enc;
+    Enc += "m(";
+    Enc += I->getName();
+    Enc += "){";
+    if (I->isBitField()) {
+      Enc += "b(";
+      llvm::raw_svector_ostream OS(Enc);
+      OS.resync();
+      OS << I->getBitWidthValue(CGM.getContext());
+      OS.flush();
+      Enc += ':';
+    }
+    if (!appendType(Enc, I->getType(), CGM, TSC))
+      return false;
+    if (I->isBitField())
+      Enc += ')';
+    Enc += '}';
+    FE.push_back(FieldEncoding(!I->getName().empty(), Enc));
+  }
+  return true;
+}
+
+/// Appends structure and union types to Enc and adds encoding to cache.
+/// Recursively calls appendType (via extractFieldType) for each field.
+/// Union types have their fields ordered according to the ABI.
+static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
+                             const CodeGen::CodeGenModule &CGM,
+                             TypeStringCache &TSC, const IdentifierInfo *ID) {
+  // Append the cached TypeString if we have one.
+  StringRef TypeString = TSC.lookupStr(ID);
+  if (!TypeString.empty()) {
+    Enc += TypeString;
+    return true;
+  }
+
+  // Start to emit an incomplete TypeString.
+  size_t Start = Enc.size();
+  Enc += (RT->isUnionType()? 'u' : 's');
+  Enc += '(';
+  if (ID)
+    Enc += ID->getName();
+  Enc += "){";
+
+  // We collect all encoded fields and order as necessary.
+  bool IsRecursive = false;
+  const RecordDecl *RD = RT->getDecl()->getDefinition();
+  if (RD && !RD->field_empty()) {
+    // An incomplete TypeString stub is placed in the cache for this RecordType
+    // so that recursive calls to this RecordType will use it whilst building a
+    // complete TypeString for this RecordType.
+    SmallVector<FieldEncoding, 16> FE;
+    std::string StubEnc(Enc.substr(Start).str());
+    StubEnc += '}';  // StubEnc now holds a valid incomplete TypeString.
+    TSC.addIncomplete(ID, std::move(StubEnc));
+    if (!extractFieldType(FE, RD, CGM, TSC)) {
+      (void) TSC.removeIncomplete(ID);
+      return false;
+    }
+    IsRecursive = TSC.removeIncomplete(ID);
+    // The ABI requires unions to be sorted but not structures.
+    // See FieldEncoding::operator< for sort algorithm.
+    if (RT->isUnionType())
+      std::sort(FE.begin(), FE.end());
+    // We can now complete the TypeString.
+    unsigned E = FE.size();
+    for (unsigned I = 0; I != E; ++I) {
+      if (I)
+        Enc += ',';
+      Enc += FE[I].str();
+    }
+  }
+  Enc += '}';
+  TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive);
+  return true;
+}
+
+/// Appends enum types to Enc and adds the encoding to the cache.
+static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
+                           TypeStringCache &TSC,
+                           const IdentifierInfo *ID) {
+  // Append the cached TypeString if we have one.
+  StringRef TypeString = TSC.lookupStr(ID);
+  if (!TypeString.empty()) {
+    Enc += TypeString;
+    return true;
+  }
+
+  size_t Start = Enc.size();
+  Enc += "e(";
+  if (ID)
+    Enc += ID->getName();
+  Enc += "){";
+
+  // We collect all encoded enumerations and order them alphanumerically.
+  if (const EnumDecl *ED = ET->getDecl()->getDefinition()) {
+    SmallVector<FieldEncoding, 16> FE;
+    for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E;
+         ++I) {
+      SmallStringEnc EnumEnc;
+      EnumEnc += "m(";
+      EnumEnc += I->getName();
+      EnumEnc += "){";
+      I->getInitVal().toString(EnumEnc);
+      EnumEnc += '}';
+      FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
+    }
+    std::sort(FE.begin(), FE.end());
+    unsigned E = FE.size();
+    for (unsigned I = 0; I != E; ++I) {
+      if (I)
+        Enc += ',';
+      Enc += FE[I].str();
+    }
+  }
+  Enc += '}';
+  TSC.addIfComplete(ID, Enc.substr(Start), false);
+  return true;
+}
+
+/// Appends type's qualifier to Enc.
+/// This is done prior to appending the type's encoding.
+static void appendQualifier(SmallStringEnc &Enc, QualType QT) {
+  // Qualifiers are emitted in alphabetical order.
+  static const char *Table[] = {"","c:","r:","cr:","v:","cv:","rv:","crv:"};
+  int Lookup = 0;
+  if (QT.isConstQualified())
+    Lookup += 1<<0;
+  if (QT.isRestrictQualified())
+    Lookup += 1<<1;
+  if (QT.isVolatileQualified())
+    Lookup += 1<<2;
+  Enc += Table[Lookup];
+}
+
+/// Appends built-in types to Enc.
+static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) {
+  const char *EncType;
+  switch (BT->getKind()) {
+    case BuiltinType::Void:
+      EncType = "0";
+      break;
+    case BuiltinType::Bool:
+      EncType = "b";
+      break;
+    case BuiltinType::Char_U:
+      EncType = "uc";
+      break;
+    case BuiltinType::UChar:
+      EncType = "uc";
+      break;
+    case BuiltinType::SChar:
+      EncType = "sc";
+      break;
+    case BuiltinType::UShort:
+      EncType = "us";
+      break;
+    case BuiltinType::Short:
+      EncType = "ss";
+      break;
+    case BuiltinType::UInt:
+      EncType = "ui";
+      break;
+    case BuiltinType::Int:
+      EncType = "si";
+      break;
+    case BuiltinType::ULong:
+      EncType = "ul";
+      break;
+    case BuiltinType::Long:
+      EncType = "sl";
+      break;
+    case BuiltinType::ULongLong:
+      EncType = "ull";
+      break;
+    case BuiltinType::LongLong:
+      EncType = "sll";
+      break;
+    case BuiltinType::Float:
+      EncType = "ft";
+      break;
+    case BuiltinType::Double:
+      EncType = "d";
+      break;
+    case BuiltinType::LongDouble:
+      EncType = "ld";
+      break;
+    default:
+      return false;
+  }
+  Enc += EncType;
+  return true;
+}
+
+/// Appends a pointer encoding to Enc before calling appendType for the pointee.
+static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT,
+                              const CodeGen::CodeGenModule &CGM,
+                              TypeStringCache &TSC) {
+  Enc += "p(";
+  if (!appendType(Enc, PT->getPointeeType(), CGM, TSC))
+    return false;
+  Enc += ')';
+  return true;
+}
+
+/// Appends array encoding to Enc before calling appendType for the element.
+static bool appendArrayType(SmallStringEnc &Enc, QualType QT,
+                            const ArrayType *AT,
+                            const CodeGen::CodeGenModule &CGM,
+                            TypeStringCache &TSC, StringRef NoSizeEnc) {
+  if (AT->getSizeModifier() != ArrayType::Normal)
+    return false;
+  Enc += "a(";
+  if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
+    CAT->getSize().toStringUnsigned(Enc);
+  else
+    Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "".
+  Enc += ':';
+  // The Qualifiers should be attached to the type rather than the array.
+  appendQualifier(Enc, QT);
+  if (!appendType(Enc, AT->getElementType(), CGM, TSC))
+    return false;
+  Enc += ')';
+  return true;
+}
+
+/// Appends a function encoding to Enc, calling appendType for the return type
+/// and the arguments.
+static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT,
+                             const CodeGen::CodeGenModule &CGM,
+                             TypeStringCache &TSC) {
+  Enc += "f{";
+  if (!appendType(Enc, FT->getReturnType(), CGM, TSC))
+    return false;
+  Enc += "}(";
+  if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) {
+    // N.B. we are only interested in the adjusted param types.
+    auto I = FPT->param_type_begin();
+    auto E = FPT->param_type_end();
+    if (I != E) {
+      do {
+        if (!appendType(Enc, *I, CGM, TSC))
+          return false;
+        ++I;
+        if (I != E)
+          Enc += ',';
+      } while (I != E);
+      if (FPT->isVariadic())
+        Enc += ",va";
+    } else {
+      if (FPT->isVariadic())
+        Enc += "va";
+      else
+        Enc += '0';
+    }
+  }
+  Enc += ')';
+  return true;
+}
+
+/// Handles the type's qualifier before dispatching a call to handle specific
+/// type encodings.
+static bool appendType(SmallStringEnc &Enc, QualType QType,
+                       const CodeGen::CodeGenModule &CGM,
+                       TypeStringCache &TSC) {
+
+  QualType QT = QType.getCanonicalType();
+
+  if (const ArrayType *AT = QT->getAsArrayTypeUnsafe())
+    // The Qualifiers should be attached to the type rather than the array.
+    // Thus we don't call appendQualifier() here.
+    return appendArrayType(Enc, QT, AT, CGM, TSC, "");
+
+  appendQualifier(Enc, QT);
+
+  if (const BuiltinType *BT = QT->getAs<BuiltinType>())
+    return appendBuiltinType(Enc, BT);
+
+  if (const PointerType *PT = QT->getAs<PointerType>())
+    return appendPointerType(Enc, PT, CGM, TSC);
+
+  if (const EnumType *ET = QT->getAs<EnumType>())
+    return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier());
+
+  if (const RecordType *RT = QT->getAsStructureType())
+    return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
+
+  if (const RecordType *RT = QT->getAsUnionType())
+    return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
+
+  if (const FunctionType *FT = QT->getAs<FunctionType>())
+    return appendFunctionType(Enc, FT, CGM, TSC);
+
+  return false;
+}
+
+static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
+                          CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) {
+  if (!D)
+    return false;
+
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    if (FD->getLanguageLinkage() != CLanguageLinkage)
+      return false;
+    return appendType(Enc, FD->getType(), CGM, TSC);
+  }
+
+  if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
+    if (VD->getLanguageLinkage() != CLanguageLinkage)
+      return false;
+    QualType QT = VD->getType().getCanonicalType();
+    if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) {
+      // Global ArrayTypes are given a size of '*' if the size is unknown.
+      // The Qualifiers should be attached to the type rather than the array.
+      // Thus we don't call appendQualifier() here.
+      return appendArrayType(Enc, QT, AT, CGM, TSC, "*");
+    }
+    return appendType(Enc, QT, CGM, TSC);
+  }
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 // Driver code
 //===----------------------------------------------------------------------===//
@@ -5524,13 +6751,23 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
 
   case llvm::Triple::aarch64:
-    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types));
+  case llvm::Triple::aarch64_be:
+  case llvm::Triple::arm64:
+  case llvm::Triple::arm64_be: {
+    AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
+    if (getTarget().getABI() == "darwinpcs")
+      Kind = AArch64ABIInfo::DarwinPCS;
+
+    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind));
+  }
 
   case llvm::Triple::arm:
+  case llvm::Triple::armeb:
   case llvm::Triple::thumb:
+  case llvm::Triple::thumbeb:
     {
       ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
-      if (strcmp(getTarget().getABI(), "apcs-gnu") == 0)
+      if (getTarget().getABI() == "apcs-gnu")
         Kind = ARMABIInfo::APCS;
       else if (CodeGenOpts.FloatABI == "hard" ||
                (CodeGenOpts.FloatABI != "soft" &&
@@ -5550,13 +6787,20 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   case llvm::Triple::ppc:
     return *(TheTargetCodeGenInfo = new PPC32TargetCodeGenInfo(Types));
   case llvm::Triple::ppc64:
-    if (Triple.isOSBinFormatELF())
-      return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types));
-    else
+    if (Triple.isOSBinFormatELF()) {
+      // FIXME: Should be switchable via command-line option.
+      PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
+      return *(TheTargetCodeGenInfo =
+               new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+    } else
       return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types));
-  case llvm::Triple::ppc64le:
+  case llvm::Triple::ppc64le: {
     assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
-    return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types));
+    // FIXME: Should be switchable via command-line option.
+    PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
+    return *(TheTargetCodeGenInfo =
+             new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+  }
 
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
@@ -5575,7 +6819,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     bool IsDarwinVectorABI = Triple.isOSDarwin();
     bool IsSmallStructInRegABI =
         X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
-    bool IsWin32FloatStructABI = (Triple.getOS() == llvm::Triple::Win32);
+    bool IsWin32FloatStructABI = Triple.isWindowsMSVCEnvironment();
 
     if (Triple.getOS() == llvm::Triple::Win32) {
       return *(TheTargetCodeGenInfo =
@@ -5593,12 +6837,10 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   }
 
   case llvm::Triple::x86_64: {
-    bool HasAVX = strcmp(getTarget().getABI(), "avx") == 0;
+    bool HasAVX = getTarget().getABI() == "avx";
 
     switch (Triple.getOS()) {
     case llvm::Triple::Win32:
-    case llvm::Triple::MinGW32:
-    case llvm::Triple::Cygwin:
       return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types));
     case llvm::Triple::NaCl:
       return *(TheTargetCodeGenInfo = new NaClX86_64TargetCodeGenInfo(Types,
@@ -5613,7 +6855,6 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   case llvm::Triple::sparcv9:
     return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
   case llvm::Triple::xcore:
-    return *(TheTargetCodeGenInfo = new XcoreTargetCodeGenInfo(Types));
-
+    return *(TheTargetCodeGenInfo = new XCoreTargetCodeGenInfo(Types));
   }
 }
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index f631f3102d0d..2616820185f5 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -17,189 +17,199 @@
 
 #include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
-  class Constant;
-  class GlobalValue;
-  class Type;
-  class Value;
+class Constant;
+class GlobalValue;
+class Type;
+class Value;
 }
 
 namespace clang {
-  class ABIInfo;
-  class Decl;
-
-  namespace CodeGen {
-    class CallArgList;
-    class CodeGenModule;
-    class CodeGenFunction;
-    class CGFunctionInfo;
+class ABIInfo;
+class Decl;
+
+namespace CodeGen {
+class CallArgList;
+class CodeGenModule;
+class CodeGenFunction;
+class CGFunctionInfo;
+}
+
+/// TargetCodeGenInfo - This class organizes various target-specific
+/// codegeneration issues, like target-specific attributes, builtins and so
+/// on.
+class TargetCodeGenInfo {
+  ABIInfo *Info;
+
+public:
+  // WARNING: Acquires the ownership of ABIInfo.
+  TargetCodeGenInfo(ABIInfo *info = 0) : Info(info) {}
+  virtual ~TargetCodeGenInfo();
+
+  /// getABIInfo() - Returns ABI info helper for the target.
+  const ABIInfo &getABIInfo() const { return *Info; }
+
+  /// SetTargetAttributes - Provides a convenient hook to handle extra
+  /// target-specific attributes for the given global.
+  virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                                   CodeGen::CodeGenModule &M) const {}
+
+  /// EmitTargetMD - Provides a convenient hook to handle extra
+  /// target-specific metadata for the given global.
+  virtual void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                            CodeGen::CodeGenModule &M) const {}
+
+  /// Determines the size of struct _Unwind_Exception on this platform,
+  /// in 8-bit units.  The Itanium ABI defines this as:
+  ///   struct _Unwind_Exception {
+  ///     uint64 exception_class;
+  ///     _Unwind_Exception_Cleanup_Fn exception_cleanup;
+  ///     uint64 private_1;
+  ///     uint64 private_2;
+  ///   };
+  virtual unsigned getSizeOfUnwindException() const;
+
+  /// Controls whether __builtin_extend_pointer should sign-extend
+  /// pointers to uint64_t or zero-extend them (the default).  Has
+  /// no effect for targets:
+  ///   - that have 64-bit pointers, or
+  ///   - that cannot address through registers larger than pointers, or
+  ///   - that implicitly ignore/truncate the top bits when addressing
+  ///     through such registers.
+  virtual bool extendPointerWithSExt() const { return false; }
+
+  /// Determines the DWARF register number for the stack pointer, for
+  /// exception-handling purposes.  Implements __builtin_dwarf_sp_column.
+  ///
+  /// Returns -1 if the operation is unsupported by this target.
+  virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+    return -1;
+  }
+
+  /// Initializes the given DWARF EH register-size table, a char*.
+  /// Implements __builtin_init_dwarf_reg_size_table.
+  ///
+  /// Returns true if the operation is unsupported by this target.
+  virtual bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                                       llvm::Value *Address) const {
+    return true;
+  }
+
+  /// Performs the code-generation required to convert a return
+  /// address as stored by the system into the actual address of the
+  /// next instruction that will be executed.
+  ///
+  /// Used by __builtin_extract_return_addr().
+  virtual llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
+                                           llvm::Value *Address) const {
+    return Address;
+  }
+
+  /// Performs the code-generation required to convert the address
+  /// of an instruction into a return address suitable for storage
+  /// by the system in a return slot.
+  ///
+  /// Used by __builtin_frob_return_addr().
+  virtual llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
+                                           llvm::Value *Address) const {
+    return Address;
+  }
+
+  /// Corrects the low-level LLVM type for a given constraint and "usual"
+  /// type.
+  ///
+  /// \returns A pointer to a new LLVM type, possibly the same as the original
+  /// on success; 0 on failure.
+  virtual llvm::Type *adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
+                                          StringRef Constraint,
+                                          llvm::Type *Ty) const {
+    return Ty;
+  }
+
+  /// doesReturnSlotInterfereWithArgs - Return true if the target uses an
+  /// argument slot for an 'sret' type.
+  virtual bool doesReturnSlotInterfereWithArgs() const { return true; }
+
+  /// Retrieve the address of a function to call immediately before
+  /// calling objc_retainAutoreleasedReturnValue.  The
+  /// implementation of objc_autoreleaseReturnValue sniffs the
+  /// instruction stream following its return address to decide
+  /// whether it's a call to objc_retainAutoreleasedReturnValue.
+  /// This can be prohibitively expensive, depending on the
+  /// relocation model, and so on some targets it instead sniffs for
+  /// a particular instruction sequence.  This functions returns
+  /// that instruction sequence in inline assembly, which will be
+  /// empty if none is required.
+  virtual StringRef getARCRetainAutoreleasedReturnValueMarker() const {
+    return "";
+  }
+
+  /// Return a constant used by UBSan as a signature to identify functions
+  /// possessing type information, or 0 if the platform is unsupported.
+  virtual llvm::Constant *
+  getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const {
+    return nullptr;
   }
 
-  /// TargetCodeGenInfo - This class organizes various target-specific
-  /// codegeneration issues, like target-specific attributes, builtins and so
-  /// on.
-  class TargetCodeGenInfo {
-    ABIInfo *Info;
-  public:
-    // WARNING: Acquires the ownership of ABIInfo.
-    TargetCodeGenInfo(ABIInfo *info = 0):Info(info) { }
-    virtual ~TargetCodeGenInfo();
-
-    /// getABIInfo() - Returns ABI info helper for the target.
-    const ABIInfo& getABIInfo() const { return *Info; }
-
-    /// SetTargetAttributes - Provides a convenient hook to handle extra
-    /// target-specific attributes for the given global.
-    virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                                     CodeGen::CodeGenModule &M) const { }
-
-    /// Determines the size of struct _Unwind_Exception on this platform,
-    /// in 8-bit units.  The Itanium ABI defines this as:
-    ///   struct _Unwind_Exception {
-    ///     uint64 exception_class;
-    ///     _Unwind_Exception_Cleanup_Fn exception_cleanup;
-    ///     uint64 private_1;
-    ///     uint64 private_2;
-    ///   };
-    virtual unsigned getSizeOfUnwindException() const;
-
-    /// Controls whether __builtin_extend_pointer should sign-extend
-    /// pointers to uint64_t or zero-extend them (the default).  Has
-    /// no effect for targets:
-    ///   - that have 64-bit pointers, or
-    ///   - that cannot address through registers larger than pointers, or
-    ///   - that implicitly ignore/truncate the top bits when addressing
-    ///     through such registers.
-    virtual bool extendPointerWithSExt() const { return false; }
-
-    /// Determines the DWARF register number for the stack pointer, for
-    /// exception-handling purposes.  Implements __builtin_dwarf_sp_column.
-    ///
-    /// Returns -1 if the operation is unsupported by this target.
-    virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
-      return -1;
-    }
-
-    /// Initializes the given DWARF EH register-size table, a char*.
-    /// Implements __builtin_init_dwarf_reg_size_table.
-    ///
-    /// Returns true if the operation is unsupported by this target.
-    virtual bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                                         llvm::Value *Address) const {
-      return true;
-    }
-
-    /// Performs the code-generation required to convert a return
-    /// address as stored by the system into the actual address of the
-    /// next instruction that will be executed.
-    ///
-    /// Used by __builtin_extract_return_addr().
-    virtual llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
-                                             llvm::Value *Address) const {
-      return Address;
-    }
-
-    /// Performs the code-generation required to convert the address
-    /// of an instruction into a return address suitable for storage
-    /// by the system in a return slot.
-    ///
-    /// Used by __builtin_frob_return_addr().
-    virtual llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
-                                             llvm::Value *Address) const {
-      return Address;
-    }
-
-    /// Corrects the low-level LLVM type for a given constraint and "usual"
-    /// type.
-    ///
-    /// \returns A pointer to a new LLVM type, possibly the same as the original
-    /// on success; 0 on failure.
-    virtual llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
-                                            StringRef Constraint,
-                                            llvm::Type* Ty) const {
-      return Ty;
-    }
-
-    /// Retrieve the address of a function to call immediately before
-    /// calling objc_retainAutoreleasedReturnValue.  The
-    /// implementation of objc_autoreleaseReturnValue sniffs the
-    /// instruction stream following its return address to decide
-    /// whether it's a call to objc_retainAutoreleasedReturnValue.
-    /// This can be prohibitively expensive, depending on the
-    /// relocation model, and so on some targets it instead sniffs for
-    /// a particular instruction sequence.  This functions returns
-    /// that instruction sequence in inline assembly, which will be
-    /// empty if none is required.
-    virtual StringRef getARCRetainAutoreleasedReturnValueMarker() const {
-      return "";
-    }
-
-    /// Return a constant used by UBSan as a signature to identify functions
-    /// possessing type information, or 0 if the platform is unsupported.
-    virtual llvm::Constant *getUBSanFunctionSignature(
-        CodeGen::CodeGenModule &CGM) const {
-      return 0;
-    }
-
-    /// Determine whether a call to an unprototyped functions under
-    /// the given calling convention should use the variadic
-    /// convention or the non-variadic convention.
-    ///
-    /// There's a good reason to make a platform's variadic calling
-    /// convention be different from its non-variadic calling
-    /// convention: the non-variadic arguments can be passed in
-    /// registers (better for performance), and the variadic arguments
-    /// can be passed on the stack (also better for performance).  If
-    /// this is done, however, unprototyped functions *must* use the
-    /// non-variadic convention, because C99 states that a call
-    /// through an unprototyped function type must succeed if the
-    /// function was defined with a non-variadic prototype with
-    /// compatible parameters.  Therefore, splitting the conventions
-    /// makes it impossible to call a variadic function through an
-    /// unprototyped type.  Since function prototypes came out in the
-    /// late 1970s, this is probably an acceptable trade-off.
-    /// Nonetheless, not all platforms are willing to make it, and in
-    /// particularly x86-64 bends over backwards to make the
-    /// conventions compatible.
-    ///
-    /// The default is false.  This is correct whenever:
-    ///   - the conventions are exactly the same, because it does not
-    ///     matter and the resulting IR will be somewhat prettier in
-    ///     certain cases; or
-    ///   - the conventions are substantively different in how they pass
-    ///     arguments, because in this case using the variadic convention
-    ///     will lead to C99 violations.
-    ///
-    /// However, some platforms make the conventions identical except
-    /// for passing additional out-of-band information to a variadic
-    /// function: for example, x86-64 passes the number of SSE
-    /// arguments in %al.  On these platforms, it is desireable to
-    /// call unprototyped functions using the variadic convention so
-    /// that unprototyped calls to varargs functions still succeed.
-    ///
-    /// Relatedly, platforms which pass the fixed arguments to this:
-    ///   A foo(B, C, D);
-    /// differently than they would pass them to this:
-    ///   A foo(B, C, D, ...);
-    /// may need to adjust the debugger-support code in Sema to do the
-    /// right thing when calling a function with no know signature.
-    virtual bool isNoProtoCallVariadic(const CodeGen::CallArgList &args,
-                                       const FunctionNoProtoType *fnType) const;
-
-    /// Gets the linker options necessary to link a dependent library on this
-    /// platform.
-    virtual void getDependentLibraryOption(llvm::StringRef Lib,
-                                           llvm::SmallString<24> &Opt) const;
-
-    /// Gets the linker options necessary to detect object file mismatches on
-    /// this platform.
-    virtual void getDetectMismatchOption(llvm::StringRef Name,
-                                         llvm::StringRef Value,
-                                         llvm::SmallString<32> &Opt) const {}
-  };
+  /// Determine whether a call to an unprototyped functions under
+  /// the given calling convention should use the variadic
+  /// convention or the non-variadic convention.
+  ///
+  /// There's a good reason to make a platform's variadic calling
+  /// convention be different from its non-variadic calling
+  /// convention: the non-variadic arguments can be passed in
+  /// registers (better for performance), and the variadic arguments
+  /// can be passed on the stack (also better for performance).  If
+  /// this is done, however, unprototyped functions *must* use the
+  /// non-variadic convention, because C99 states that a call
+  /// through an unprototyped function type must succeed if the
+  /// function was defined with a non-variadic prototype with
+  /// compatible parameters.  Therefore, splitting the conventions
+  /// makes it impossible to call a variadic function through an
+  /// unprototyped type.  Since function prototypes came out in the
+  /// late 1970s, this is probably an acceptable trade-off.
+  /// Nonetheless, not all platforms are willing to make it, and in
+  /// particularly x86-64 bends over backwards to make the
+  /// conventions compatible.
+  ///
+  /// The default is false.  This is correct whenever:
+  ///   - the conventions are exactly the same, because it does not
+  ///     matter and the resulting IR will be somewhat prettier in
+  ///     certain cases; or
+  ///   - the conventions are substantively different in how they pass
+  ///     arguments, because in this case using the variadic convention
+  ///     will lead to C99 violations.
+  ///
+  /// However, some platforms make the conventions identical except
+  /// for passing additional out-of-band information to a variadic
+  /// function: for example, x86-64 passes the number of SSE
+  /// arguments in %al.  On these platforms, it is desirable to
+  /// call unprototyped functions using the variadic convention so
+  /// that unprototyped calls to varargs functions still succeed.
+  ///
+  /// Relatedly, platforms which pass the fixed arguments to this:
+  ///   A foo(B, C, D);
+  /// differently than they would pass them to this:
+  ///   A foo(B, C, D, ...);
+  /// may need to adjust the debugger-support code in Sema to do the
+  /// right thing when calling a function with no know signature.
+  virtual bool isNoProtoCallVariadic(const CodeGen::CallArgList &args,
+                                     const FunctionNoProtoType *fnType) const;
+
+  /// Gets the linker options necessary to link a dependent library on this
+  /// platform.
+  virtual void getDependentLibraryOption(llvm::StringRef Lib,
+                                         llvm::SmallString<24> &Opt) const;
+
+  /// Gets the linker options necessary to detect object file mismatches on
+  /// this platform.
+  virtual void getDetectMismatchOption(llvm::StringRef Name,
+                                       llvm::StringRef Value,
+                                       llvm::SmallString<32> &Opt) const {}
+};
 }
 
 #endif // CLANG_CODEGEN_TARGETINFO_H
author	Dimitry Andric <dim@FreeBSD.org>	2014-11-24 09:15:30 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2014-11-24 09:15:30 +0000
commit	9f4dbff6669c8037f3b036bcf580d14f1a4f12a5 (patch)
tree	47df2c12b57214af6c31e47404b005675b8b7ffc /lib/CodeGen
parent	f73d5f23a889b93d89ddef61ac0995df40286bb8 (diff)
download	src-test2-9f4dbff6669c8037f3b036bcf580d14f1a4f12a5.tar.gz src-test2-9f4dbff6669c8037f3b036bcf580d14f1a4f12a5.zip