100 files changed, 1115 insertions, 1533 deletions
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index ee55c52df6e7..dae8e758c7cf 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -334,11 +334,6 @@ function(llvm_add_library name)
         PREFIX ""
         )
     endif()
-    if (MSVC)
-      set_target_properties(${name}
-        PROPERTIES
-        IMPORT_SUFFIX ".imp")
-    endif ()
   endif()
 
   if(ARG_MODULE OR ARG_SHARED)
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index f27fb0277085..107f9d297d0f 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -391,7 +391,8 @@ During this release the MIPS target has reached a few major milestones. The
 compiler has gained support for MIPS-II and MIPS-III; become ABI-compatible
 with GCC for big and little endian O32, N32, and N64; and is now able to
 compile the Linux kernel for 32-bit targets. Additionally, LLD now supports
-microMIPS for the O32 ABI on little endian targets.
+microMIPS for the O32 ABI on little endian targets, and code generation for
+microMIPS is almost completely passing the test-suite.
 
 ABI
 ^^^
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index f0d0b2dbcdbc..584ce65c9d17 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -66,7 +66,6 @@ struct LandingPadInfo {
   MachineBasicBlock *LandingPadBlock;    // Landing pad block.
   SmallVector<MCSymbol*, 1> BeginLabels; // Labels prior to invoke.
   SmallVector<MCSymbol*, 1> EndLabels;   // Labels after invoke.
-  SmallVector<MCSymbol*, 1> ClauseLabels; // Labels for each clause.
   MCSymbol *LandingPadLabel;             // Label at beginning of landing pad.
   const Function *Personality;           // Personality function.
   std::vector<int> TypeIds;              // List of type ids (filters negative)
@@ -331,11 +330,6 @@ public:
   ///
   void addCleanup(MachineBasicBlock *LandingPad);
 
-  /// Add a clause for a landing pad. Returns a new label for the clause. This
-  /// is used by EH schemes that have more than one landing pad. In this case,
-  /// each clause gets its own basic block.
-  MCSymbol *addClauseForLandingPad(MachineBasicBlock *LandingPad);
-
   /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
   /// function wide.
   unsigned getTypeIDFor(const GlobalValue *TI);
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index eceb790c547d..5c0e9859915f 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -248,7 +248,7 @@ public:
   void setReductionState(ReductionState RS) { this->RS = RS; }
 
   void handleAddEdge(const MatrixMetadata& MD, bool Transpose) {
-    DeniedOpts += Transpose ? MD.getWorstCol() : MD.getWorstRow();
+    DeniedOpts += Transpose ? MD.getWorstRow() : MD.getWorstCol();
     const bool* UnsafeOpts =
       Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
     for (unsigned i = 0; i < NumOpts; ++i)
@@ -256,7 +256,7 @@ public:
   }
 
   void handleRemoveEdge(const MatrixMetadata& MD, bool Transpose) {
-    DeniedOpts -= Transpose ? MD.getWorstCol() : MD.getWorstRow();
+    DeniedOpts -= Transpose ? MD.getWorstRow() : MD.getWorstCol();
     const bool* UnsafeOpts =
       Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
     for (unsigned i = 0; i < NumOpts; ++i)
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index 3bf6d38d311d..27a5d6f80041 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -693,6 +693,7 @@ public:
   static AAMDNodes getMostGenericAA(const AAMDNodes &A, const AAMDNodes &B);
   static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B);
   static MDNode *getMostGenericRange(MDNode *A, MDNode *B);
+  static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B);
 };
 
 /// \brief Uniquable metadata node.
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 58ac5d33409e..86e7fc21b950 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -52,7 +52,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   const TargetTransformInfo &TTI;
 
   /// The cache of @llvm.assume intrinsics.
-  AssumptionCache &AC;
+  AssumptionCacheTracker *ACT;
 
   // The called function.
   Function &F;
@@ -146,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 
 public:
   CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
-               AssumptionCache &AC, Function &Callee, int Threshold)
-      : DL(DL), TTI(TTI), AC(AC), F(Callee), Threshold(Threshold), Cost(0),
+               AssumptionCacheTracker *ACT, Function &Callee, int Threshold)
+      : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
         IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // during devirtualization and so we want to give it a hefty bonus for
   // inlining, but cap that bonus in the event that inlining wouldn't pan
   // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(DL, TTI, AC, *F, InlineConstants::IndirectCallThreshold);
+  CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // bonus we want to apply, but don't go below zero.
@@ -1110,7 +1110,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // the ephemeral values multiple times (and they're completely determined by
   // the callee, so this is purely duplicate work).
   SmallPtrSet<const Value *, 32> EphValues;
-  CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
+  CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
 
   // The worklist of live basic blocks in the callee *after* inlining. We avoid
   // adding basic blocks of the callee which can be proven to be dead for this
@@ -1310,7 +1310,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
         << "...\n");
 
   CallAnalyzer CA(Callee->getDataLayout(), *TTI,
-                  ACT->getAssumptionCache(*Callee), *Callee, Threshold);
+                  ACT, *Callee, Threshold);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 085ce920139b..ff8955870cb5 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -623,8 +623,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
     N.TBAA = getMetadata(LLVMContext::MD_tbaa);
 
   if (Merge)
-    N.Scope =
-        MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope));
+    N.Scope = MDNode::getMostGenericAliasScope(
+        N.Scope, getMetadata(LLVMContext::MD_alias_scope));
   else
     N.Scope = getMetadata(LLVMContext::MD_alias_scope);
 
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 9b3acb5ca0a9..868fbf010db3 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -9,9 +9,11 @@
 
 #include "llvm-c/BitReader.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstring>
 #include <string>
 
@@ -30,11 +32,20 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
                                    LLVMMemoryBufferRef MemBuf,
                                    LLVMModuleRef *OutModule,
                                    char **OutMessage) {
-  ErrorOr<Module *> ModuleOrErr =
-      parseBitcodeFile(unwrap(MemBuf)->getMemBufferRef(), *unwrap(ContextRef));
-  if (std::error_code EC = ModuleOrErr.getError()) {
-    if (OutMessage)
-      *OutMessage = strdup(EC.message().c_str());
+  MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
+  LLVMContext &Ctx = *unwrap(ContextRef);
+
+  std::string Message;
+  raw_string_ostream Stream(Message);
+  DiagnosticPrinterRawOStream DP(Stream);
+
+  ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(
+      Buf, Ctx, [&](const DiagnosticInfo &DI) { DI.print(DP); });
+  if (ModuleOrErr.getError()) {
+    if (OutMessage) {
+      Stream.flush();
+      *OutMessage = strdup(Message.c_str());
+    }
     *OutModule = wrap((Module*)nullptr);
     return 1;
   }
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 1bc86f6c222a..f112120c1abc 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -121,8 +121,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
       for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
         int TypeID = TypeIds[J];
         assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
-        int ValueForTypeID =
-            isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
+        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
         unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
@@ -270,14 +269,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         CallSiteEntry Site = {
           BeginLabel,
           LastLabel,
-          LandingPad,
+          LandingPad->LandingPadLabel,
           FirstActions[P.PadIndex]
         };
 
         // Try to merge with the previous call-site. SJLJ doesn't do this
         if (PreviousIsInvoke && !IsSJLJ) {
           CallSiteEntry &Prev = CallSites.back();
-          if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) {
+          if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
             Prev.EndLabel = Site.EndLabel;
             continue;
@@ -577,15 +576,15 @@ void EHStreamer::emitExceptionTable() {
 
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      if (!S.LPad) {
+      if (!S.PadLabel) {
         if (VerboseAsm)
           Asm->OutStreamer.AddComment("    has no landing pad");
         Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
       } else {
         if (VerboseAsm)
           Asm->OutStreamer.AddComment(Twine("    jumps to ") +
-                                      S.LPad->LandingPadLabel->getName());
-        Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4);
+                                      S.PadLabel->getName());
+        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
       }
 
       // Offset of the first associated action record, relative to the start of
@@ -682,7 +681,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
     unsigned TypeID = *I;
     if (VerboseAsm) {
       --Entry;
-      if (isFilterEHSelector(TypeID))
+      if (TypeID != 0)
         Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
     }
 
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 9b316ff00e9b..e93055ce8655 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -23,8 +23,6 @@ class MachineModuleInfo;
 class MachineInstr;
 class MachineFunction;
 class AsmPrinter;
-class MCSymbol;
-class MCSymbolRefExpr;
 
 template <typename T>
 class SmallVectorImpl;
@@ -62,11 +60,11 @@ protected:
   /// Structure describing an entry in the call-site table.
   struct CallSiteEntry {
     // The 'try-range' is BeginLabel .. EndLabel.
-    MCSymbol *BeginLabel; // Null indicates the start of the function.
-    MCSymbol *EndLabel;   // Null indicates the end of the function.
+    MCSymbol *BeginLabel; // zero indicates the start of the function.
+    MCSymbol *EndLabel;   // zero indicates the end of the function.
 
-    // LPad contains the landing pad start labels.
-    const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
+    // The landing pad starts at PadLabel.
+    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
     unsigned Action;
   };
 
@@ -114,13 +112,6 @@ protected:
 
   virtual void emitTypeInfos(unsigned TTypeEncoding);
 
-  // Helpers for for identifying what kind of clause an EH typeid or selector
-  // corresponds to. Negative selectors are for filter clauses, the zero
-  // selector is for cleanups, and positive selectors are for catch clauses.
-  static bool isFilterEHSelector(int Selector) { return Selector < 0; }
-  static bool isCleanupEHSelector(int Selector) { return Selector == 0; }
-  static bool isCatchEHSelector(int Selector) { return Selector > 0; }
-
 public:
   EHStreamer(AsmPrinter *A);
   virtual ~EHStreamer();
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 2138cb9514f8..0f0ad755835d 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -99,156 +99,9 @@ void Win64Exception::endFunction(const MachineFunction *) {
 
   if (shouldEmitPersonality) {
     Asm->OutStreamer.PushSection();
-
-    // Emit an UNWIND_INFO struct describing the prologue.
     Asm->OutStreamer.EmitWinEHHandlerData();
-
-    // Emit either MSVC-compatible tables or the usual Itanium-style LSDA after
-    // the UNWIND_INFO struct.
-    if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::MSVC) {
-      const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
-      if (Per->getName() == "__C_specific_handler")
-        emitCSpecificHandlerTable();
-      else
-        report_fatal_error(Twine("unexpected personality function: ") +
-                           Per->getName());
-    } else {
-      emitExceptionTable();
-    }
-
+    emitExceptionTable();
     Asm->OutStreamer.PopSection();
   }
   Asm->OutStreamer.EmitWinCFIEndProc();
 }
-
-const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
-  return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
-                                 Asm->OutContext);
-}
-
-/// Emit the language-specific data that __C_specific_handler expects.  This
-/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
-/// up after faults with __try, __except, and __finally.  The typeinfo values
-/// are not really RTTI data, but pointers to filter functions that return an
-/// integer (1, 0, or -1) indicating how to handle the exception. For __finally
-/// blocks and other cleanups, the landing pad label is zero, and the filter
-/// function is actually a cleanup handler with the same prototype.  A catch-all
-/// entry is modeled with a null filter function field and a non-zero landing
-/// pad label.
-///
-/// Possible filter function return values:
-///   EXCEPTION_EXECUTE_HANDLER (1):
-///     Jump to the landing pad label after cleanups.
-///   EXCEPTION_CONTINUE_SEARCH (0):
-///     Continue searching this table or continue unwinding.
-///   EXCEPTION_CONTINUE_EXECUTION (-1):
-///     Resume execution at the trapping PC.
-///
-/// Inferred table structure:
-///   struct Table {
-///     int NumEntries;
-///     struct Entry {
-///       imagerel32 LabelStart;
-///       imagerel32 LabelEnd;
-///       imagerel32 FilterOrFinally;  // Zero means catch-all.
-///       imagerel32 LabelLPad;        // Zero means __finally.
-///     } Entries[NumEntries];
-///   };
-void Win64Exception::emitCSpecificHandlerTable() {
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-
-  // Simplifying assumptions for first implementation:
-  // - Cleanups are not implemented.
-  // - Filters are not implemented.
-
-  // The Itanium LSDA table sorts similar landing pads together to simplify the
-  // actions table, but we don't need that.
-  SmallVector<const LandingPadInfo *, 64> LandingPads;
-  LandingPads.reserve(PadInfos.size());
-  for (const auto &LP : PadInfos)
-    LandingPads.push_back(&LP);
-
-  // Compute label ranges for call sites as we would for the Itanium LSDA, but
-  // use an all zero action table because we aren't using these actions.
-  SmallVector<unsigned, 64> FirstActions;
-  FirstActions.resize(LandingPads.size());
-  SmallVector<CallSiteEntry, 64> CallSites;
-  computeCallSiteTable(CallSites, LandingPads, FirstActions);
-
-  MCSymbol *EHFuncBeginSym =
-      Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
-  MCSymbol *EHFuncEndSym =
-      Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-
-  // Emit the number of table entries.
-  unsigned NumEntries = 0;
-  for (const CallSiteEntry &CSE : CallSites) {
-    if (!CSE.LPad)
-      continue; // Ignore gaps.
-    for (int Selector : CSE.LPad->TypeIds) {
-      // Ignore C++ filter clauses in SEH.
-      // FIXME: Implement cleanup clauses.
-      if (isCatchEHSelector(Selector))
-        ++NumEntries;
-    }
-  }
-  Asm->OutStreamer.EmitIntValue(NumEntries, 4);
-
-  // Emit the four-label records for each call site entry. The table has to be
-  // sorted in layout order, and the call sites should already be sorted.
-  for (const CallSiteEntry &CSE : CallSites) {
-    // Ignore gaps. Unlike the Itanium model, unwinding through a frame without
-    // an EH table entry will propagate the exception rather than terminating
-    // the program.
-    if (!CSE.LPad)
-      continue;
-    const LandingPadInfo *LPad = CSE.LPad;
-
-    // Compute the label range. We may reuse the function begin and end labels
-    // rather than forming new ones.
-    const MCExpr *Begin =
-        createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
-    const MCExpr *End;
-    if (CSE.EndLabel) {
-      // The interval is half-open, so we have to add one to include the return
-      // address of the last invoke in the range.
-      End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel),
-                                    MCConstantExpr::Create(1, Asm->OutContext),
-                                    Asm->OutContext);
-    } else {
-      End = createImageRel32(EHFuncEndSym);
-    }
-
-    // These aren't really type info globals, they are actually pointers to
-    // filter functions ordered by selector. The zero selector is used for
-    // cleanups, so slot zero corresponds to selector 1.
-    const std::vector<const GlobalValue *> &SelectorToFilter = MMI->getTypeInfos();
-
-    // Do a parallel iteration across typeids and clause labels, skipping filter
-    // clauses.
-    assert(LPad->TypeIds.size() == LPad->ClauseLabels.size());
-    for (size_t I = 0, E = LPad->TypeIds.size(); I < E; ++I) {
-      // AddLandingPadInfo stores the clauses in reverse, but there is a FIXME
-      // to change that.
-      int Selector = LPad->TypeIds[E - I - 1];
-      MCSymbol *ClauseLabel = LPad->ClauseLabels[I];
-
-      // Ignore C++ filter clauses in SEH.
-      // FIXME: Implement cleanup clauses.
-      if (!isCatchEHSelector(Selector))
-        continue;
-
-      Asm->OutStreamer.EmitValue(Begin, 4);
-      Asm->OutStreamer.EmitValue(End, 4);
-      if (isCatchEHSelector(Selector)) {
-        assert(unsigned(Selector - 1) < SelectorToFilter.size());
-        const GlobalValue *TI = SelectorToFilter[Selector - 1];
-        if (TI) // Emit the filter function pointer.
-          Asm->OutStreamer.EmitValue(createImageRel32(Asm->getSymbol(TI)), 4);
-        else  // Otherwise, this is a "catch i8* null", or catch all.
-          Asm->OutStreamer.EmitIntValue(0, 4);
-      }
-      Asm->OutStreamer.EmitValue(createImageRel32(ClauseLabel), 4);
-    }
-  }
-}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h
index b2d5d1bce563..538e1328157f 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.h
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -29,10 +29,6 @@ class Win64Exception : public EHStreamer {
   /// Per-function flag to indicate if frame moves info should be emitted.
   bool shouldEmitMoves;
 
-  void emitCSpecificHandlerTable();
-
-  const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value);
-
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 32d511285eb2..baad411e2c5b 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -452,14 +452,6 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
   LP.TypeIds.push_back(0);
 }
 
-MCSymbol *
-MachineModuleInfo::addClauseForLandingPad(MachineBasicBlock *LandingPad) {
-  MCSymbol *ClauseLabel = Context.CreateTempSymbol();
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.ClauseLabels.push_back(ClauseLabel);
-  return ClauseLabel;
-}
-
 /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
 /// pads.
 void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 28e9f847a9d7..e53e874cb178 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -449,9 +449,9 @@ void TargetPassConfig::addPassesToHandleExceptions() {
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
   case ExceptionHandling::ItaniumWinEH:
-  case ExceptionHandling::MSVC: // FIXME: Needs preparation.
     addPass(createDwarfEHPass(TM));
     break;
+  case ExceptionHandling::MSVC: // FIXME: Add preparation.
   case ExceptionHandling::None:
     addPass(createLowerInvokePass());
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5145731f6231..1bd6cfff9d25 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6544,19 +6544,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
 
   // If the input is a constant, let getNode fold it.
   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
-    SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
-    if (Res.getNode() != N) {
-      if (!LegalOperations ||
-          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
-        return Res;
-
-      // Folding it resulted in an illegal node, and it's too late to
-      // do that. Clean up the old node and forego the transformation.
-      // Ideally this won't happen very often, because instcombine
-      // and the earlier dagcombine runs (where illegal nodes are
-      // permitted) should have folded most of them already.
-      deleteAndRecombine(Res.getNode());
-    }
+    // If we can't allow illegal operations, we need to check that this is just
+    // a fp -> int or int -> conversion and that the resulting operation will
+    // be legal.
+    if (!LegalOperations ||
+        (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+         TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+        (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+         TLI.isOperationLegal(ISD::Constant, VT)))
+      return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
   }
 
   // (conv (conv x, t1), t2) -> (conv x, t2)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index eac404c50365..3a8c276e2618 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -390,7 +390,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
       if (Op.getOperand(j)
               .getValueType()
               .getVectorElementType()
-              .isFloatingPoint())
+              .isFloatingPoint() &&
+          NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
       else
         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
@@ -399,8 +400,9 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
   }
 
   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
-  if (VT.isFloatingPoint() ||
-      (VT.isVector() && VT.getVectorElementType().isFloatingPoint()))
+  if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
+      (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
+       NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
   else
     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
@@ -554,9 +556,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
       BitOffset += SrcEltBits;
       if (BitOffset >= WideBits) {
         WideIdx++;
-        Offset -= WideBits;
-        if (Offset > 0) {
-          ShAmt = DAG.getConstant(SrcEltBits - Offset,
+        BitOffset -= WideBits;
+        if (BitOffset > 0) {
+          ShAmt = DAG.getConstant(SrcEltBits - BitOffset,
                                   TLI.getShiftAmountTy(WideVT));
           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 86a63eea7c2a..151bc724df67 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2071,14 +2071,10 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   // Get the two live-in registers as SDValues. The physregs have already been
   // copied into virtual registers.
   SDValue Ops[2];
-  if (FuncInfo.ExceptionPointerVirtReg) {
-    Ops[0] = DAG.getZExtOrTrunc(
-        DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
-                           FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
-        getCurSDLoc(), ValueVTs[0]);
-  } else {
-    Ops[0] = DAG.getConstant(0, TLI.getPointerTy());
-  }
+  Ops[0] = DAG.getZExtOrTrunc(
+      DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+                         FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+      getCurSDLoc(), ValueVTs[0]);
   Ops[1] = DAG.getZExtOrTrunc(
       DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
                          FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
@@ -2090,27 +2086,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   setValue(&LP, Res);
 }
 
-unsigned
-SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
-                                             MachineBasicBlock *LPadBB) {
-  SDValue Chain = getControlRoot();
-
-  // Get the typeid that we will dispatch on later.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
-  unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
-  unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
-  SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy());
-  Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel);
-
-  // Branch to the main landing pad block.
-  MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
-  ClauseMBB->addSuccessor(LPadBB);
-  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain,
-                          DAG.getBasicBlock(LPadBB)));
-  return VReg;
-}
-
 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
 /// small case ranges).
 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index eba98b8086b7..9070091d77b8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -713,8 +713,6 @@ public:
   void visitJumpTable(JumpTable &JT);
   void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
                             MachineBasicBlock *SwitchBB);
-  unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
-                                   MachineBasicBlock *LPadMBB);
 
 private:
   // These all get lowered before this pass.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4f031d3ff7e7..ef5452554f72 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -41,7 +40,6 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -909,8 +907,6 @@ void SelectionDAGISel::DoInstructionSelection() {
 void SelectionDAGISel::PrepareEHLandingPad() {
   MachineBasicBlock *MBB = FuncInfo->MBB;
 
-  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
-
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
   MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -922,66 +918,8 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
-  if (TM.getMCAsmInfo()->getExceptionHandlingType() ==
-      ExceptionHandling::MSVC) {
-    // Make virtual registers and a series of labels that fill in values for the
-    // clauses.
-    auto &RI = MF->getRegInfo();
-    FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
-
-    // Get all invoke BBs that will unwind into the clause BBs.
-    SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
-                                                  MBB->pred_end());
-
-    // Emit separate machine basic blocks with separate labels for each clause
-    // before the main landing pad block.
-    const BasicBlock *LLVMBB = MBB->getBasicBlock();
-    const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
-    MachineInstrBuilder SelectorPHI = BuildMI(
-        *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI),
-        FuncInfo->ExceptionSelectorVirtReg);
-    for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
-      MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
-      MF->insert(MBB, ClauseBB);
-
-      // Add the edge from the invoke to the clause.
-      for (MachineBasicBlock *InvokeBB : InvokeBBs)
-        InvokeBB->addSuccessor(ClauseBB);
-
-      // Mark the clause as a landing pad or MI passes will delete it.
-      ClauseBB->setIsLandingPad();
-
-      GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
-
-      // Start the BB with a label.
-      MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
-      BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
-          .addSym(ClauseLabel);
-
-      // Construct a simple BB that defines a register with the typeid constant.
-      FuncInfo->MBB = ClauseBB;
-      FuncInfo->InsertPt = ClauseBB->end();
-      unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
-      CurDAG->setRoot(SDB->getRoot());
-      SDB->clear();
-      CodeGenAndEmitDAG();
-
-      // Add the typeid virtual register to the phi in the main landing pad.
-      SelectorPHI.addReg(VReg).addMBB(ClauseBB);
-    }
-
-    // Remove the edge from the invoke to the lpad.
-    for (MachineBasicBlock *InvokeBB : InvokeBBs)
-      InvokeBB->removeSuccessor(MBB);
-
-    // Restore FuncInfo back to its previous state and select the main landing
-    // pad block.
-    FuncInfo->MBB = MBB;
-    FuncInfo->InsertPt = MBB->end();
-    return;
-  }
-
   // Mark exception register as live in.
+  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
   if (unsigned Reg = TLI->getExceptionPointerRegister())
     FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
 
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 290dbe29c707..71b43942e882 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -525,12 +525,15 @@ bool DISubprogram::Verify() const {
         while ((IA = DL.getInlinedAt()))
           DL = DebugLoc::getFromDILocation(IA);
         DL.getScopeAndInlinedAt(Scope, IA);
+        if (!Scope)
+          return false;
         assert(!IA);
         while (!DIDescriptor(Scope).isSubprogram()) {
           DILexicalBlockFile D(Scope);
           Scope = D.isLexicalBlockFile()
                       ? D.getScope()
                       : DebugLoc::getFromDILexicalBlock(Scope).getScope();
+          assert(Scope && "lexical block file has no scope");
         }
         if (!DISubprogram(Scope).describes(F))
           return false;
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 2c6b332dc8ee..63e5730f954e 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -826,6 +826,28 @@ MDNode *MDNode::intersect(MDNode *A, MDNode *B) {
   return getOrSelfReference(A->getContext(), MDs);
 }
 
+MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return nullptr;
+
+  SmallVector<Metadata *, 4> MDs(B->op_begin(), B->op_end());
+  for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i) {
+    Metadata *MD = A->getOperand(i);
+    bool insert = true;
+    for (unsigned j = 0, je = B->getNumOperands(); j != je; ++j)
+      if (MD == B->getOperand(j)) {
+        insert = false;
+        break;
+      }
+    if (insert)
+        MDs.push_back(MD);
+  }
+
+  // FIXME: This preserves long-standing behaviour, but is it really the right
+  // behaviour?  Or was that an unintended side-effect of node uniquing?
+  return getOrSelfReference(A->getContext(), MDs);
+}
+
 MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
   if (!A || !B)
     return nullptr;
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 889705e95fc2..65060dc39d27 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -708,9 +708,10 @@ VectorType::VectorType(Type *ElType, unsigned NumEl)
 VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
   Type *ElementType = const_cast<Type*>(elementType);
   assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
-  assert(isValidElementType(ElementType) &&
-         "Elements of a VectorType must be a primitive type");
-  
+  assert(isValidElementType(ElementType) && "Element type of a VectorType must "
+                                            "be an integer, floating point, or "
+                                            "pointer type.");
+
   LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
   VectorType *&Entry = ElementType->getContext().pImpl
     ->VectorTypes[std::make_pair(ElementType, NumElements)];
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index e95845f0af01..4d6298c542e2 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -47,6 +47,10 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
   }
 
   OS << "\t.section\t" << getSectionName() << ",\"";
+  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+    OS << 'd';
+  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+    OS << 'b';
   if (getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)
     OS << 'x';
   if (getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE)
@@ -55,10 +59,6 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
     OS << 'r';
   else
     OS << 'y';
-  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
-    OS << 'd';
-  if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
-    OS << 'b';
   if (getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)
     OS << 'n';
   if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index c17f99b9bd7b..ec0e0f7256a4 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -710,17 +710,22 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
     CrossSection = &Symbol.getSection() != &B->getSection();
 
     // Offset of the symbol in the section
-    int64_t a = Layout.getSymbolOffset(&B_SD);
+    int64_t OffsetOfB = Layout.getSymbolOffset(&B_SD);
 
-    // Offset of the relocation in the section
-    int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
-
-    FixedValue = b - a;
     // In the case where we have SymbA and SymB, we just need to store the delta
     // between the two symbols.  Update FixedValue to account for the delta, and
     // skip recording the relocation.
-    if (!CrossSection)
+    if (!CrossSection) {
+      int64_t OffsetOfA = Layout.getSymbolOffset(&A_SD);
+      FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant();
       return;
+    }
+
+    // Offset of the relocation in the section
+    int64_t OffsetOfRelocation =
+        Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
+    FixedValue = OffsetOfRelocation - OffsetOfB;
   } else {
     FixedValue = Target.getConstant();
   }
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
index 0b5b765f89e1..b79692966473 100644
--- a/lib/Support/regcomp.c
+++ b/lib/Support/regcomp.c
@@ -49,6 +49,14 @@
 #include "regcclass.h"
 #include "regcname.h"
 
+#include "llvm/Config/config.h"
+#if HAVE_STDINT_H
+#include <stdint.h>
+#else
+/* Pessimistically bound memory use */
+#define SIZE_MAX UINT_MAX
+#endif
+
 /*
  * parse structure, passed up and down to avoid global variables and
  * other clumsinesses
@@ -1069,6 +1077,8 @@ allocset(struct parse *p)
 
 		p->ncsalloc += CHAR_BIT;
 		nc = p->ncsalloc;
+		if (nc > SIZE_MAX / sizeof(cset))
+			goto nomem;
 		assert(nc % CHAR_BIT == 0);
 		nbytes = nc / CHAR_BIT * css;
 
@@ -1412,6 +1422,11 @@ enlarge(struct parse *p, sopno size)
 	if (p->ssize >= size)
 		return;
 
+	if ((unsigned long)size > SIZE_MAX / sizeof(sop)) {
+		SETERROR(REG_ESPACE);
+		return;
+	}
+
 	sp = (sop *)realloc(p->strip, size*sizeof(sop));
 	if (sp == NULL) {
 		SETERROR(REG_ESPACE);
@@ -1428,6 +1443,12 @@ static void
 stripsnug(struct parse *p, struct re_guts *g)
 {
 	g->nstates = p->slen;
+	if ((unsigned long)p->slen > SIZE_MAX / sizeof(sop)) {
+		g->strip = p->strip;
+		SETERROR(REG_ESPACE);
+		return;
+	}
+
 	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
 	if (g->strip == NULL) {
 		SETERROR(REG_ESPACE);
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 19f51ce9450c..399b5eeaf5f5 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6287,6 +6287,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
                                     AArch64CC::CondCode CC, bool NoNans, EVT VT,
                                     SDLoc dl, SelectionDAG &DAG) {
   EVT SrcVT = LHS.getValueType();
+  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+         "function only supposed to emit natural comparisons");
 
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
   APInt CnstBits(VT.getSizeInBits(), 0);
@@ -6381,13 +6383,15 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
+  EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
   SDLoc dl(Op);
 
   if (LHS.getValueType().getVectorElementType().isInteger()) {
     assert(LHS.getValueType() == RHS.getValueType());
     AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
-    return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(),
-                                dl, DAG);
+    SDValue Cmp =
+        EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
+    return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
   }
 
   assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
@@ -6401,19 +6405,21 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
 
   bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
   SDValue Cmp =
-      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
+      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
   if (!Cmp.getNode())
     return SDValue();
 
   if (CC2 != AArch64CC::AL) {
     SDValue Cmp2 =
-        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
+        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
     if (!Cmp2.getNode())
       return SDValue();
 
-    Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2);
+    Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
   }
 
+  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
+
   if (ShouldInvert)
     return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
 
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e0397cce962d..c12442255a01 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2400,7 +2400,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
     // Conservatively refuse to convert an instruction which isn't in the same
     // BB as the comparison.
-    // For CMPri, we need to check Sub, thus we can't return here.
+    // For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
+    // Thus we cannot return here.
     if (CmpInstr->getOpcode() == ARM::CMPri ||
        CmpInstr->getOpcode() == ARM::t2CMPri)
       MI = nullptr;
@@ -2479,8 +2480,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   case ARM::t2EORrr:
   case ARM::t2EORri: {
     // Scan forward for the use of CPSR
-    // When checking against MI: if it's a conditional code requires
-    // checking of V bit, then this is not safe to do.
+    // When checking against MI: if it's a conditional code that requires
+    // checking of the V bit or C bit, then this is not safe to do.
     // It is safe to remove CmpInstr if CPSR is redefined or killed.
     // If we are done with the basic block, we need to check whether CPSR is
     // live-out.
@@ -2547,19 +2548,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
             OperandsToUpdate.push_back(
                 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
           }
-        } else
+        } else {
+          // No Sub, so this is x = <op> y, z; cmp x, 0.
           switch (CC) {
-          default:
+          case ARMCC::EQ: // Z
+          case ARMCC::NE: // Z
+          case ARMCC::MI: // N
+          case ARMCC::PL: // N
+          case ARMCC::AL: // none
             // CPSR can be used multiple times, we should continue.
             break;
-          case ARMCC::VS:
-          case ARMCC::VC:
-          case ARMCC::GE:
-          case ARMCC::LT:
-          case ARMCC::GT:
-          case ARMCC::LE:
+          case ARMCC::HS: // C
+          case ARMCC::LO: // C
+          case ARMCC::VS: // V
+          case ARMCC::VC: // V
+          case ARMCC::HI: // C Z
+          case ARMCC::LS: // C Z
+          case ARMCC::GE: // N V
+          case ARMCC::LT: // N V
+          case ARMCC::GT: // Z N V
+          case ARMCC::LE: // Z N V
+            // The instruction uses the V bit or C bit which is not safe.
             return false;
           }
+        }
       }
     }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f92f257cd7eb..a1de5efb4507 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -565,7 +565,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
     setTargetDAGCombine(ISD::FP_TO_SINT);
     setTargetDAGCombine(ISD::FP_TO_UINT);
     setTargetDAGCombine(ISD::FDIV);
-    setTargetDAGCombine(ISD::LOAD);
 
     // It is legal to extload from v4i8 to v4i16 or v4i32.
     MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
@@ -4488,6 +4487,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   SDValue CC = Op.getOperand(2);
+  EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
   EVT VT = Op.getValueType();
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   SDLoc dl(Op);
@@ -4517,8 +4517,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
       TmpOp0 = Op0;
       TmpOp1 = Op1;
       Opc = ISD::OR;
-      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
-      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+      Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+      Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
       break;
     case ISD::SETUO: Invert = true; // Fallthrough
     case ISD::SETO:
@@ -4526,8 +4526,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
       TmpOp0 = Op0;
       TmpOp1 = Op1;
       Opc = ISD::OR;
-      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
-      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+      Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
+      Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
       break;
     }
   } else {
@@ -4561,8 +4561,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
 
       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
         Opc = ARMISD::VTST;
-        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
-        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+        Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
+        Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
         Invert = !Invert;
       }
     }
@@ -4588,22 +4588,24 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   if (SingleOp.getNode()) {
     switch (Opc) {
     case ARMISD::VCEQ:
-      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+      Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
     case ARMISD::VCGE:
-      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+      Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
     case ARMISD::VCLEZ:
-      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+      Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
     case ARMISD::VCGT:
-      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+      Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
     case ARMISD::VCLTZ:
-      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+      Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
     default:
-      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+      Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
     }
   } else {
-     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+     Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
   }
 
+  Result = DAG.getSExtOrTrunc(Result, dl, VT);
+
   if (Invert)
     Result = DAG.getNOT(dl, Result, VT);
 
@@ -8877,18 +8879,17 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
                               DAG.getUNDEF(VT), NewMask.data());
 }
 
-/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
-/// NEON load/store intrinsics, and generic vector load/stores, to merge
-/// base address updates.
-/// For generic load/stores, the memory type is assumed to be a vector.
-/// The caller is assumed to have checked legality.
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
 static SDValue CombineBaseUpdate(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
   SelectionDAG &DAG = DCI.DAG;
   bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
                       N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
-  bool isStore = N->getOpcode() == ISD::STORE;
-  unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
+  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
   SDValue Addr = N->getOperand(AddrOpIdx);
 
   // Search for a use of the address operand that is an increment.
@@ -8949,10 +8950,6 @@ static SDValue CombineBaseUpdate(SDNode *N,
       case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
       case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
       case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
-      case ISD::LOAD:       NewOpc = ARMISD::VLD1_UPD;
-        NumVecs = 1; isLaneOp = false; break;
-      case ISD::STORE:      NewOpc = ARMISD::VST1_UPD;
-        NumVecs = 1; isLoad = false; isLaneOp = false; break;
       }
     }
 
@@ -8960,11 +8957,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
     EVT VecTy;
     if (isLoad)
       VecTy = N->getValueType(0);
-    else if (isIntrinsic)
-      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
     else
-      VecTy = N->getOperand(1).getValueType();
-
+      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
     unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
     if (isLaneOp)
       NumBytes /= VecTy.getVectorNumElements();
@@ -8981,70 +8975,25 @@ static SDValue CombineBaseUpdate(SDNode *N,
       continue;
     }
 
-    EVT AlignedVecTy = VecTy;
-
-    // If this is a less-than-standard-aligned load/store, change the type to
-    // match the standard alignment.
-    // The alignment is overlooked when selecting _UPD variants; and it's
-    // easier to introduce bitcasts here than fix that.
-    // There are 3 ways to get to this base-update combine:
-    // - intrinsics: they are assumed to be properly aligned (to the standard
-    //   alignment of the memory type), so we don't need to do anything.
-    // - ARMISD::VLDx nodes: they are only generated from the aforementioned
-    //   intrinsics, so, likewise, there's nothing to do.
-    // - generic load/store instructions: the alignment is specified as an
-    //   explicit operand, rather than implicitly as the standard alignment
-    //   of the memory type (like the intrisics).  We need to change the
-    //   memory type to match the explicit alignment.  That way, we don't
-    //   generate non-standard-aligned ARMISD::VLDx nodes.
-    if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N)) {
-      unsigned Alignment = LSN->getAlignment();
-      if (Alignment == 0)
-        Alignment = 1;
-      if (Alignment < VecTy.getScalarSizeInBits() / 8) {
-        MVT EltTy = MVT::getIntegerVT(Alignment * 8);
-        assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
-        assert(!isLaneOp && "Unexpected generic load/store lane.");
-        unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
-        AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
-      }
-    }
-
     // Create the new updating load/store node.
-    // First, create an SDVTList for the new updating node's results.
     EVT Tys[6];
     unsigned NumResultVecs = (isLoad ? NumVecs : 0);
     unsigned n;
     for (n = 0; n < NumResultVecs; ++n)
-      Tys[n] = AlignedVecTy;
+      Tys[n] = VecTy;
     Tys[n++] = MVT::i32;
     Tys[n] = MVT::Other;
     SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
-
-    // Then, gather the new node's operands.
     SmallVector<SDValue, 8> Ops;
     Ops.push_back(N->getOperand(0)); // incoming chain
     Ops.push_back(N->getOperand(AddrOpIdx));
     Ops.push_back(Inc);
-    if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
-      // Try to match the intrinsic's signature
-      Ops.push_back(StN->getValue());
-      Ops.push_back(DAG.getConstant(StN->getAlignment(), MVT::i32));
-    } else {
-      for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i)
-        Ops.push_back(N->getOperand(i));
-    }
-
-    // If this is a non-standard-aligned store, the penultimate operand is the
-    // stored value.  Bitcast it to the aligned type.
-    if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
-      SDValue &StVal = Ops[Ops.size()-2];
-      StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal);
+    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+      Ops.push_back(N->getOperand(i));
     }
-
-    MemSDNode *MemInt = cast<MemSDNode>(N);
+    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
     SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
-                                           Ops, AlignedVecTy,
+                                           Ops, MemInt->getMemoryVT(),
                                            MemInt->getMemOperand());
 
     // Update the uses.
@@ -9052,14 +9001,6 @@ static SDValue CombineBaseUpdate(SDNode *N,
     for (unsigned i = 0; i < NumResultVecs; ++i) {
       NewResults.push_back(SDValue(UpdN.getNode(), i));
     }
-
-    // If this is an non-standard-aligned load, the first result is the loaded
-    // value.  Bitcast it to the expected result type.
-    if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
-      SDValue &LdVal = NewResults[0];
-      LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal);
-    }
-
     NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
     DCI.CombineTo(N, NewResults);
     DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
@@ -9069,14 +9010,6 @@ static SDValue CombineBaseUpdate(SDNode *N,
   return SDValue();
 }
 
-static SDValue PerformVLDCombine(SDNode *N,
-                                 TargetLowering::DAGCombinerInfo &DCI) {
-  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
-    return SDValue();
-
-  return CombineBaseUpdate(N, DCI);
-}
-
 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
 /// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
@@ -9190,18 +9123,6 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
   return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
 }
 
-static SDValue PerformLOADCombine(SDNode *N,
-                                  TargetLowering::DAGCombinerInfo &DCI) {
-  EVT VT = N->getValueType(0);
-
-  // If this is a legal vector load, try to combine it into a VLD1_UPD.
-  if (ISD::isNormalLoad(N) && VT.isVector() &&
-      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
-    return CombineBaseUpdate(N, DCI);
-
-  return SDValue();
-}
-
 /// PerformSTORECombine - Target-specific dag combine xforms for
 /// ISD::STORE.
 static SDValue PerformSTORECombine(SDNode *N,
@@ -9340,11 +9261,6 @@ static SDValue PerformSTORECombine(SDNode *N,
                         St->getAAInfo());
   }
 
-  // If this is a legal vector store, try to combine it into a VST1_UPD.
-  if (ISD::isNormalStore(N) && VT.isVector() &&
-      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
-    return CombineBaseUpdate(N, DCI);
-
   return SDValue();
 }
 
@@ -9938,11 +9854,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
   case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
-  case ISD::LOAD:       return PerformLOADCombine(N, DCI);
   case ARMISD::VLD2DUP:
   case ARMISD::VLD3DUP:
   case ARMISD::VLD4DUP:
-    return PerformVLDCombine(N, DCI);
+    return CombineBaseUpdate(N, DCI);
   case ARMISD::BUILD_VECTOR:
     return PerformARMBUILD_VECTORCombine(N, DCI);
   case ISD::INTRINSIC_VOID:
@@ -9962,7 +9877,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
     case Intrinsic::arm_neon_vst2lane:
     case Intrinsic::arm_neon_vst3lane:
     case Intrinsic::arm_neon_vst4lane:
-      return PerformVLDCombine(N, DCI);
+      return CombineBaseUpdate(N, DCI);
     default: break;
     }
     break;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 96f3b4e64326..56de9d2f470c 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -9195,34 +9195,48 @@ static const struct {
   const uint64_t Enabled;
   const uint64_t Disabled;
 } FPUs[] = {
-    {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
-    {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
-    {ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON},
-    {ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
-     ARM::FeatureNEON},
-    {ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
-     ARM::FeatureNEON},
-    {ARM::VFPV4_D16,
-     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16,
-     ARM::FeatureNEON},
-    {ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-                        ARM::FeatureFPARMv8 | ARM::FeatureD16,
-     ARM::FeatureNEON | ARM::FeatureCrypto},
-    {ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-                        ARM::FeatureFPARMv8,
-     ARM::FeatureNEON | ARM::FeatureCrypto},
-    {ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0},
-    {ARM::NEON_VFPV4,
-     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON,
-     0},
-    {ARM::NEON_FP_ARMV8,
-     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+    {/* ID */ ARM::VFP,
+     /* Enabled */ ARM::FeatureVFP2,
+     /* Disabled */ ARM::FeatureNEON},
+    {/* ID */ ARM::VFPV2,
+     /* Enabled */ ARM::FeatureVFP2,
+     /* Disabled */ ARM::FeatureNEON},
+    {/* ID */ ARM::VFPV3,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3,
+     /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
+    {/* ID */ ARM::VFPV3_D16,
+     /* Enable */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
+     /* Disabled */ ARM::FeatureNEON},
+    {/* ID */ ARM::VFPV4,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
+     /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
+    {/* ID */ ARM::VFPV4_D16,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+         ARM::FeatureD16,
+     /* Disabled */ ARM::FeatureNEON},
+    {/* ID */ ARM::FPV5_D16,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+         ARM::FeatureFPARMv8 | ARM::FeatureD16,
+     /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto},
+    {/* ID */ ARM::FP_ARMV8,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+         ARM::FeatureFPARMv8,
+     /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto | ARM::FeatureD16},
+    {/* ID */ ARM::NEON,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON,
+     /* Disabled */ ARM::FeatureD16},
+    {/* ID */ ARM::NEON_VFPV4,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+         ARM::FeatureNEON,
+     /* Disabled */ ARM::FeatureD16},
+    {/* ID */ ARM::NEON_FP_ARMV8,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
          ARM::FeatureFPARMv8 | ARM::FeatureNEON,
-     ARM::FeatureCrypto},
-    {ARM::CRYPTO_NEON_FP_ARMV8,
-     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+     /* Disabled */ ARM::FeatureCrypto | ARM::FeatureD16},
+    {/* ID */ ARM::CRYPTO_NEON_FP_ARMV8,
+     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
          ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto,
-     0},
+     /* Disabled */ ARM::FeatureD16},
     {ARM::SOFTVFP, 0, 0},
 };
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index d597cdcbd644..ad3bc1dbeeeb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3134,7 +3134,8 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
 def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
                     "icbi $src", IIC_LdStICBI, []>;
 
-def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
+// We used to have EIEIO as value but E[0-9A-Z] is a reserved name
+def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
                            "eieio", IIC_LdStLoad, []>;
 
 def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index e7bc00635f75..0bc62d0ab04d 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -100,7 +100,7 @@ bool AMDGPUTTI::hasBranchDivergence() const { return true; }
 void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
                                         UnrollingPreferences &UP) const {
   UP.Threshold = 300; // Twice the default.
-  UP.Count = UINT_MAX;
+  UP.MaxCount = UINT_MAX;
   UP.Partial = true;
 
   // TODO: Do we want runtime unrolling?
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index 91eb60beb6bb..c99219dd9074 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -14,6 +14,7 @@
 
 #include "AMDGPU.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -66,6 +67,8 @@ class SIAnnotateControlFlow : public FunctionPass {
   DominatorTree *DT;
   StackVector Stack;
 
+  LoopInfo *LI;
+
   bool isTopOfStack(BasicBlock *BB);
 
   Value *popSaved();
@@ -99,6 +102,7 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfo>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
@@ -277,10 +281,25 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
 
   Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
   push(Term->getSuccessor(0), Arg);
-}
-
-/// \brief Close the last opened control flow
+}/// \brief Close the last opened control flow
 void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+  llvm::Loop *L = LI->getLoopFor(BB);
+
+  if (L && L->getHeader() == BB) {
+    // We can't insert an EndCF call into a loop header, because it will
+    // get executed on every iteration of the loop, when it should be
+    // executed only once before the loop.
+    SmallVector <BasicBlock*, 8> Latches;
+    L->getLoopLatches(Latches);
+
+    std::vector<BasicBlock*> Preds;
+    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+      if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
+        Preds.push_back(*PI);
+    }
+    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", this);
+  }
+
   CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
 }
 
@@ -288,6 +307,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
 /// recognize if/then/else and loops.
 bool SIAnnotateControlFlow::runOnFunction(Function &F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LI = &getAnalysis<LoopInfo>();
 
   for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
        E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 0396bf384066..58c2cd109680 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -266,6 +266,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       break;
     case AMDGPU::SI_SPILL_V32_RESTORE:
     case AMDGPU::SI_SPILL_V64_RESTORE:
+    case AMDGPU::SI_SPILL_V96_RESTORE:
     case AMDGPU::SI_SPILL_V128_RESTORE:
     case AMDGPU::SI_SPILL_V256_RESTORE:
     case AMDGPU::SI_SPILL_V512_RESTORE: {
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index ab3319afe93f..30b3b2876b8a 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -132,9 +132,9 @@ def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
                                       "Enable XOP instructions",
                                       [FeatureFMA4]>;
-def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
-                                          "HasVectorUAMem", "true",
-                 "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
+                                          "HasSSEUnalignedMem", "true",
+                      "Allow unaligned memory operands with SSE instructions">;
 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
                                       "Enable AES instructions",
                                       [FeatureSSE2]>;
@@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
                                        FeatureCMPXCHG16B,
                                        FeatureFastUAMem,
                                        FeatureSlowUAMem32,
-                                       FeatureVectorUAMem,
                                        FeaturePOPCNT,
                                        FeatureAES,
                                        FeaturePCLMUL
@@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
                                      FeatureCMPXCHG16B,
                                      FeatureFastUAMem,
                                      FeatureSlowUAMem32,
-                                     FeatureVectorUAMem,
                                      FeaturePOPCNT,
                                      FeatureAES,
                                      FeaturePCLMUL,
@@ -337,7 +335,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
                                    FeatureAVX2,
                                    FeatureCMPXCHG16B,
                                    FeatureFastUAMem,
-                                   FeatureVectorUAMem,
                                    FeaturePOPCNT,
                                    FeatureAES,
                                    FeaturePCLMUL,
@@ -360,7 +357,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
                                      FeatureAVX2,
                                      FeatureCMPXCHG16B,
                                      FeatureFastUAMem,
-                                     FeatureVectorUAMem,
                                      FeaturePOPCNT,
                                      FeatureAES,
                                      FeaturePCLMUL,
@@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
                       FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
                       FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
                       FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureVectorUAMem]>;
+                      FeatureSlowIncDec]>;
 def : KnightsLandingProc<"knl">;
 
 // FIXME: define SKX model
@@ -399,7 +395,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
                       FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
                       FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
                       FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
+                      FeatureSlowIncDec, FeatureSGX]>;
 def : SkylakeProc<"skylake">;
 def : SkylakeProc<"skx">; // Legacy alias.
 
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 14a0f38df668..d06e94fbfe1f 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -688,11 +688,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
 
     for (const auto &Function : M)
-      if (Function.hasDLLExportStorageClass())
+      if (Function.hasDLLExportStorageClass() && !Function.isDeclaration())
         DLLExportedFns.push_back(getSymbol(&Function));
 
     for (const auto &Global : M.globals())
-      if (Global.hasDLLExportStorageClass())
+      if (Global.hasDLLExportStorageClass() && !Global.isDeclaration())
         DLLExportedGlobals.push_back(getSymbol(&Global));
 
     for (const auto &Alias : M.aliases()) {
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a1fd34ea8000..78a11e64fef0 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5473,6 +5473,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
 
     if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
       DecodePSHUFBMask(C, Mask);
+      if (Mask.empty())
+        return false;
       break;
     }
 
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 76e8fad78de3..7069bd68e00e 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -424,7 +424,7 @@ def alignedloadv8i64  : PatFrag<(ops node:$ptr),
 // setting a feature bit in the processor (on startup, for example).
 // Opteron 10h and later implement such a feature.
 def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  return    Subtarget->hasVectorUAMem()
+  return    Subtarget->hasSSEUnalignedMem()
          || cast<LoadSDNode>(N)->getAlignment() >= 16;
 }]>;
 
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e59395c06a5c..983169886ad3 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment() {
   IsSHLDSlow = false;
   IsUAMemFast = false;
   IsUAMem32Slow = false;
-  HasVectorUAMem = false;
+  HasSSEUnalignedMem = false;
   HasCmpxchg16b = false;
   UseLeaForSP = false;
   HasSlowDivide32 = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 754b5b924717..e0263d66e928 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -162,9 +162,9 @@ protected:
   /// True if unaligned 32-byte memory accesses are slow.
   bool IsUAMem32Slow;
 
-  /// HasVectorUAMem - True if SIMD operations can have unaligned memory
-  /// operands. This may require setting a feature bit in the processor.
-  bool HasVectorUAMem;
+  /// True if SSE operations can have unaligned memory operands.
+  /// This may require setting a configuration bit in the processor.
+  bool HasSSEUnalignedMem;
 
   /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
   /// this is true for most x86-64 chips, but not the first AMD chips.
@@ -378,7 +378,7 @@ public:
   bool isSHLDSlow() const { return IsSHLDSlow; }
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
-  bool hasVectorUAMem() const { return HasVectorUAMem; }
+  bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index af1694d3453c..6230c000cf57 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -330,11 +330,17 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
     case LLVMContext::MD_noalias:
     case LLVMContext::MD_nontemporal:
     case LLVMContext::MD_mem_parallel_loop_access:
-    case LLVMContext::MD_nonnull:
       // All of these directly apply.
       NewLoad->setMetadata(ID, N);
       break;
 
+    case LLVMContext::MD_nonnull:
+      // FIXME: We should translate this into range metadata for integer types
+      // and vice versa.
+      if (NewTy->isPointerTy())
+        NewLoad->setMetadata(ID, N);
+      break;
+
     case LLVMContext::MD_range:
       // FIXME: It would be nice to propagate this in some way, but the type
       // conversions make it hard.
@@ -548,13 +554,14 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
       case LLVMContext::MD_noalias:
       case LLVMContext::MD_nontemporal:
       case LLVMContext::MD_mem_parallel_loop_access:
-      case LLVMContext::MD_nonnull:
         // All of these directly apply.
         NewStore->setMetadata(ID, N);
         break;
 
       case LLVMContext::MD_invariant_load:
+      case LLVMContext::MD_nonnull:
       case LLVMContext::MD_range:
+        // These don't apply for stores.
         break;
       }
     }
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 745c85a98e2f..25f1f022c40c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -67,7 +67,7 @@ static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
 static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 36;
 static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
 static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
-static const uint64_t kWindowsShadowOffset32 = 1ULL << 30;
+static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
 
 static const size_t kMinStackMallocSize = 1 << 6;  // 64B
 static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 5f73b89e8551..2a3d15421de9 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -71,9 +71,17 @@ private:
     return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
   }
 
+  /// Get the section name for the coverage mapping data.
+  StringRef getCoverageSection() const {
+    return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+  }
+
   /// Replace instrprof_increment with an increment of the appropriate value.
   void lowerIncrement(InstrProfIncrementInst *Inc);
 
+  /// Set up the section and uses for coverage data and its references.
+  void lowerCoverageData(GlobalVariable *CoverageData);
+
   /// Get the region counters for an increment, creating them if necessary.
   ///
   /// If the counter array doesn't yet exist, the profile data variables
@@ -118,6 +126,10 @@ bool InstrProfiling::runOnModule(Module &M) {
           lowerIncrement(Inc);
           MadeChange = true;
         }
+  if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) {
+    lowerCoverageData(Coverage);
+    MadeChange = true;
+  }
   if (!MadeChange)
     return false;
 
@@ -140,6 +152,35 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
   Inc->eraseFromParent();
 }
 
+void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
+  CoverageData->setSection(getCoverageSection());
+  CoverageData->setAlignment(8);
+
+  Constant *Init = CoverageData->getInitializer();
+  // We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] }
+  // for some C. If not, the frontend's given us something broken.
+  assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map");
+  assert(isa<ConstantArray>(Init->getAggregateElement(4)) &&
+         "invalid function list in coverage map");
+  ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(4));
+  for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
+    Constant *Record = Records->getOperand(I);
+    Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
+
+    assert(isa<GlobalVariable>(V) && "Missing reference to function name");
+    GlobalVariable *Name = cast<GlobalVariable>(V);
+
+    // If we have region counters for this name, we've already handled it.
+    auto It = RegionCounters.find(Name);
+    if (It != RegionCounters.end())
+      continue;
+
+    // Move the name variable to the right section.
+    Name->setSection(getNameSection());
+    Name->setAlignment(1);
+  }
+}
+
 /// Get the name of a profiling variable for a particular function.
 static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) {
   auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer());
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9f00d3d6c824..d7d752ff5243 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -631,7 +631,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
       if (SI.isAtomic()) SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
 
-      if (MS.TrackOrigins)
+      if (MS.TrackOrigins && !SI.isAtomic())
         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), SI.getAlignment(),
                     InstrumentWithCalls);
     }
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 394b0d3de7bd..969b9a8f8df1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -480,6 +480,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       // Ignore volatile loads.
       if (!LI->isSimple()) {
         LastStore = nullptr;
+        // Don't CSE across synchronization boundaries.
+        if (Inst->mayWriteToMemory())
+          ++CurrentGeneration;
         continue;
       }
 
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 33b5f9df5a27..3eea3d4e27ae 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -750,6 +750,16 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
   // its dependence information by changing its parameter.
   MD->removeInstruction(C);
 
+  // Update AA metadata
+  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
+  // handled here, but combineMetadata doesn't support them yet
+  unsigned KnownIDs[] = {
+    LLVMContext::MD_tbaa,
+    LLVMContext::MD_alias_scope,
+    LLVMContext::MD_noalias,
+  };
+  combineMetadata(C, cpy, KnownIDs);
+
   // Remove the memcpy.
   MD->removeInstruction(cpy);
   ++NumMemCpyInstr;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 08a4b3f3b737..2a84d7e0c484 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1328,6 +1328,8 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
         K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
         break;
       case LLVMContext::MD_alias_scope:
+        K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD));
+        break;
       case LLVMContext::MD_noalias:
         K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
         break;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 477fba42412e..65f2ae2f9429 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -154,19 +154,21 @@ static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
   return mapToMetadata(VM, MD, const_cast<Metadata *>(MD));
 }
 
-static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM,
-                                 RemapFlags Flags,
+static Metadata *MapMetadataImpl(const Metadata *MD,
+                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
+                                 ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer);
 
-static Metadata *mapMetadataOp(Metadata *Op, ValueToValueMapTy &VM,
-                                 RemapFlags Flags,
-                                 ValueMapTypeRemapper *TypeMapper,
-                                 ValueMaterializer *Materializer) {
+static Metadata *mapMetadataOp(Metadata *Op,
+                               SmallVectorImpl<UniquableMDNode *> &Cycles,
+                               ValueToValueMapTy &VM, RemapFlags Flags,
+                               ValueMapTypeRemapper *TypeMapper,
+                               ValueMaterializer *Materializer) {
   if (!Op)
     return nullptr;
   if (Metadata *MappedOp =
-          MapMetadataImpl(Op, VM, Flags, TypeMapper, Materializer))
+          MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer))
     return MappedOp;
   // Use identity map if MappedOp is null and we can ignore missing entries.
   if (Flags & RF_IgnoreMissingEntries)
@@ -180,8 +182,9 @@ static Metadata *mapMetadataOp(Metadata *Op, ValueToValueMapTy &VM,
   return nullptr;
 }
 
-static Metadata *cloneMDTuple(const MDTuple *Node, ValueToValueMapTy &VM,
-                              RemapFlags Flags,
+static Metadata *cloneMDTuple(const MDTuple *Node,
+                              SmallVectorImpl<UniquableMDNode *> &Cycles,
+                              ValueToValueMapTy &VM, RemapFlags Flags,
                               ValueMapTypeRemapper *TypeMapper,
                               ValueMaterializer *Materializer,
                               bool IsDistinct) {
@@ -192,41 +195,57 @@ static Metadata *cloneMDTuple(const MDTuple *Node, ValueToValueMapTy &VM,
   SmallVector<Metadata *, 4> Elts;
   Elts.reserve(Node->getNumOperands());
   for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
-    Elts.push_back(mapMetadataOp(Node->getOperand(I), VM, Flags, TypeMapper,
-                                 Materializer));
+    Elts.push_back(mapMetadataOp(Node->getOperand(I), Cycles, VM, Flags,
+                                 TypeMapper, Materializer));
 
   return MDTuple::get(Node->getContext(), Elts);
 }
 
-static Metadata *cloneMDLocation(const MDLocation *Node, ValueToValueMapTy &VM,
-                                 RemapFlags Flags,
+static Metadata *cloneMDLocation(const MDLocation *Node,
+                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
+                                 ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer,
                                  bool IsDistinct) {
   return (IsDistinct ? MDLocation::getDistinct : MDLocation::get)(
       Node->getContext(), Node->getLine(), Node->getColumn(),
-      mapMetadataOp(Node->getScope(), VM, Flags, TypeMapper, Materializer),
-      mapMetadataOp(Node->getInlinedAt(), VM, Flags, TypeMapper, Materializer));
+      mapMetadataOp(Node->getScope(), Cycles, VM, Flags, TypeMapper,
+                    Materializer),
+      mapMetadataOp(Node->getInlinedAt(), Cycles, VM, Flags, TypeMapper,
+                    Materializer));
 }
 
-static Metadata *cloneMDNode(const UniquableMDNode *Node, ValueToValueMapTy &VM,
-                             RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+static Metadata *cloneMDNode(const UniquableMDNode *Node,
+                             SmallVectorImpl<UniquableMDNode *> &Cycles,
+                             ValueToValueMapTy &VM, RemapFlags Flags,
+                             ValueMapTypeRemapper *TypeMapper,
                              ValueMaterializer *Materializer, bool IsDistinct) {
   switch (Node->getMetadataID()) {
   default:
     llvm_unreachable("Invalid UniquableMDNode subclass");
 #define HANDLE_UNIQUABLE_LEAF(CLASS)                                           \
   case Metadata::CLASS##Kind:                                                  \
-    return clone##CLASS(cast<CLASS>(Node), VM, Flags, TypeMapper,              \
+    return clone##CLASS(cast<CLASS>(Node), Cycles, VM, Flags, TypeMapper,      \
                         Materializer, IsDistinct);
 #include "llvm/IR/Metadata.def"
   }
 }
 
+static void
+trackCyclesUnderDistinct(const UniquableMDNode *Node,
+                         SmallVectorImpl<UniquableMDNode *> &Cycles) {
+  // Track any cycles beneath this node.
+  for (Metadata *Op : Node->operands())
+    if (auto *N = dyn_cast_or_null<UniquableMDNode>(Op))
+      if (!N->isResolved())
+        Cycles.push_back(N);
+}
+
 /// \brief Map a distinct MDNode.
 ///
 /// Distinct nodes are not uniqued, so they must always recreated.
 static Metadata *mapDistinctNode(const UniquableMDNode *Node,
+                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
                                  ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer) {
@@ -241,9 +260,11 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,
 
     // Fix the operands.
     for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
-      NewMD->replaceOperandWith(I, mapMetadataOp(Node->getOperand(I), VM, Flags,
-                                                 TypeMapper, Materializer));
+      NewMD->replaceOperandWith(I,
+                                mapMetadataOp(Node->getOperand(I), Cycles, VM,
+                                              Flags, TypeMapper, Materializer));
 
+    trackCyclesUnderDistinct(NewMD, Cycles);
     return NewMD;
   }
 
@@ -252,9 +273,11 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,
   std::unique_ptr<MDNodeFwdDecl> Dummy(
       MDNode::getTemporary(Node->getContext(), None));
   mapToMetadata(VM, Node, Dummy.get());
-  Metadata *NewMD = cloneMDNode(Node, VM, Flags, TypeMapper, Materializer,
-                                /* IsDistinct */ true);
+  auto *NewMD = cast<UniquableMDNode>(cloneMDNode(Node, Cycles, VM, Flags,
+                                                  TypeMapper, Materializer,
+                                                  /* IsDistinct */ true));
   Dummy->replaceAllUsesWith(NewMD);
+  trackCyclesUnderDistinct(NewMD, Cycles);
   return mapToMetadata(VM, Node, NewMD);
 }
 
@@ -263,13 +286,14 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,
 /// Check whether a uniqued node needs to be remapped (due to any operands
 /// changing).
 static bool shouldRemapUniquedNode(const UniquableMDNode *Node,
+                                   SmallVectorImpl<UniquableMDNode *> &Cycles,
                                    ValueToValueMapTy &VM, RemapFlags Flags,
                                    ValueMapTypeRemapper *TypeMapper,
                                    ValueMaterializer *Materializer) {
   // Check all operands to see if any need to be remapped.
   for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
     Metadata *Op = Node->getOperand(I);
-    if (Op != mapMetadataOp(Op, VM, Flags, TypeMapper, Materializer))
+    if (Op != mapMetadataOp(Op, Cycles, VM, Flags, TypeMapper, Materializer))
       return true;
   }
   return false;
@@ -279,9 +303,10 @@ static bool shouldRemapUniquedNode(const UniquableMDNode *Node,
 ///
 /// Uniqued nodes may not need to be recreated (they may map to themselves).
 static Metadata *mapUniquedNode(const UniquableMDNode *Node,
-                                 ValueToValueMapTy &VM, RemapFlags Flags,
-                                 ValueMapTypeRemapper *TypeMapper,
-                                 ValueMaterializer *Materializer) {
+                                SmallVectorImpl<UniquableMDNode *> &Cycles,
+                                ValueToValueMapTy &VM, RemapFlags Flags,
+                                ValueMapTypeRemapper *TypeMapper,
+                                ValueMaterializer *Materializer) {
   assert(!Node->isDistinct() && "Expected uniqued node");
 
   // Create a dummy node in case we have a metadata cycle.
@@ -289,7 +314,8 @@ static Metadata *mapUniquedNode(const UniquableMDNode *Node,
   mapToMetadata(VM, Node, Dummy);
 
   // Check all operands to see if any need to be remapped.
-  if (!shouldRemapUniquedNode(Node, VM, Flags, TypeMapper, Materializer)) {
+  if (!shouldRemapUniquedNode(Node, Cycles, VM, Flags, TypeMapper,
+                              Materializer)) {
     // Use an identity mapping.
     mapToSelf(VM, Node);
     MDNode::deleteTemporary(Dummy);
@@ -297,15 +323,17 @@ static Metadata *mapUniquedNode(const UniquableMDNode *Node,
   }
 
   // At least one operand needs remapping.
-  Metadata *NewMD = cloneMDNode(Node, VM, Flags, TypeMapper, Materializer,
-                                /* IsDistinct */ false);
+  Metadata *NewMD =
+      cloneMDNode(Node, Cycles, VM, Flags, TypeMapper, Materializer,
+                  /* IsDistinct */ false);
   Dummy->replaceAllUsesWith(NewMD);
   MDNode::deleteTemporary(Dummy);
   return mapToMetadata(VM, Node, NewMD);
 }
 
-static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM,
-                                 RemapFlags Flags,
+static Metadata *MapMetadataImpl(const Metadata *MD,
+                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
+                                 ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer) {
   // If the value already exists in the map, use it.
@@ -345,18 +373,30 @@ static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM,
     return mapToSelf(VM, MD);
 
   if (Node->isDistinct())
-    return mapDistinctNode(Node, VM, Flags, TypeMapper, Materializer);
+    return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
 
-  return mapUniquedNode(Node, VM, Flags, TypeMapper, Materializer);
+  return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
 }
 
 Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
                             RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
                             ValueMaterializer *Materializer) {
-  Metadata *NewMD = MapMetadataImpl(MD, VM, Flags, TypeMapper, Materializer);
-  if (NewMD && NewMD != MD)
+  SmallVector<UniquableMDNode *, 8> Cycles;
+  Metadata *NewMD =
+      MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
+
+  // Resolve cycles underneath MD.
+  if (NewMD && NewMD != MD) {
     if (auto *N = dyn_cast<UniquableMDNode>(NewMD))
       N->resolveCycles();
+
+    for (UniquableMDNode *N : Cycles)
+      N->resolveCycles();
+  } else {
+    // Shouldn't get unresolved cycles if nothing was remapped.
+    assert(Cycles.empty() && "Expected no unresolved cycles");
+  }
+
   return NewMD;
 }
 
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4834782ecc14..bd8a4b3fd3d0 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -75,6 +75,18 @@ static const unsigned MinVecRegSize = 128;
 
 static const unsigned RecursionMaxDepth = 12;
 
+/// \brief Predicate for the element types that the SLP vectorizer supports.
+///
+/// The most important thing to filter here are types which are invalid in LLVM
+/// vectors. We also filter target specific types which have absolutely no
+/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
+/// avoids spending time checking the cost model and realizing that they will
+/// be inevitably scalarized.
+static bool isValidElementType(Type *Ty) {
+  return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
+         !Ty->isPPC_FP128Ty();
+}
+
 /// \returns the parent basic block if all of the instructions in \p VL
 /// are in the same block or null otherwise.
 static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
@@ -208,6 +220,8 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
         MD = MDNode::getMostGenericTBAA(MD, IMD);
         break;
       case LLVMContext::MD_alias_scope:
+        MD = MDNode::getMostGenericAliasScope(MD, IMD);
+        break;
       case LLVMContext::MD_noalias:
         MD = MDNode::intersect(MD, IMD);
         break;
@@ -1214,7 +1228,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       Type *SrcTy = VL0->getOperand(0)->getType();
       for (unsigned i = 0; i < VL.size(); ++i) {
         Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
-        if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) {
+        if (Ty != SrcTy || !isValidElementType(Ty)) {
           BS.cancelScheduling(VL);
           newTreeEntry(VL, false);
           DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
@@ -3128,7 +3142,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
 
     // Check that the pointer points to scalars.
     Type *Ty = SI->getValueOperand()->getType();
-    if (Ty->isAggregateType() || Ty->isVectorTy())
+    if (!isValidElementType(Ty))
       continue;
 
     // Find the base pointer.
@@ -3169,7 +3183,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
 
   for (int i = 0, e = VL.size(); i < e; ++i) {
     Type *Ty = VL[i]->getType();
-    if (Ty->isAggregateType() || Ty->isVectorTy())
+    if (!isValidElementType(Ty))
       return false;
     Instruction *Inst = dyn_cast<Instruction>(VL[i]);
     if (!Inst || Inst->getOpcode() != Opcode0)
@@ -3389,7 +3403,7 @@ public:
       return false;
 
     Type *Ty = B->getType();
-    if (Ty->isVectorTy())
+    if (!isValidElementType(Ty))
       return false;
 
     ReductionOpcode = B->getOpcode();
diff --git a/test/Bindings/llvm-c/Inputs/invalid.ll.bc b/test/Bindings/llvm-c/Inputs/invalid.ll.bc
new file mode 100644
index 000000000000..a85c3644b3ab
--- /dev/null
+++ b/test/Bindings/llvm-c/Inputs/invalid.ll.bc
diff --git a/test/Bindings/llvm-c/invalid-bitcode.test b/test/Bindings/llvm-c/invalid-bitcode.test
new file mode 100644
index 000000000000..6318a9bf13d9
--- /dev/null
+++ b/test/Bindings/llvm-c/invalid-bitcode.test
@@ -0,0 +1,3 @@
+; RUN: not llvm-c-test --module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
+
+CHECK: Error parsing bitcode: Unknown attribute kind (48)
diff --git a/test/CodeGen/AArch64/setcc-type-mismatch.ll b/test/CodeGen/AArch64/setcc-type-mismatch.ll
new file mode 100644
index 000000000000..86817fa4fa40
--- /dev/null
+++ b/test/CodeGen/AArch64/setcc-type-mismatch.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+
+define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) {
+; CHECK-LABEL: test_mismatched_setcc:
+; CHECK: cmeq [[CMP128:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: xtn {{v[0-9]+}}.4h, [[CMP128]].4s
+
+  %tst = icmp eq <4 x i22> %l, %r
+  store <4 x i1> %tst, <4 x i1>* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/Windows/read-only-data.ll b/test/CodeGen/ARM/Windows/read-only-data.ll
index 0ccb5ededff2..0438d68b55c6 100644
--- a/test/CodeGen/ARM/Windows/read-only-data.ll
+++ b/test/CodeGen/ARM/Windows/read-only-data.ll
@@ -10,6 +10,6 @@ entry:
   ret void
 }
 
-; CHECK: .section .rdata,"rd"
+; CHECK: .section .rdata,"dr"
 ; CHECK-NOT: .section ".rodata.str1.1"
 
diff --git a/test/CodeGen/ARM/Windows/structors.ll b/test/CodeGen/ARM/Windows/structors.ll
index a1a90265c03a..874b5bf35b81 100644
--- a/test/CodeGen/ARM/Windows/structors.ll
+++ b/test/CodeGen/ARM/Windows/structors.ll
@@ -7,6 +7,6 @@ entry:
   ret void
 }
 
-; CHECK: .section .CRT$XCU,"rd"
+; CHECK: .section .CRT$XCU,"dr"
 ; CHECK: .long function
 
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 5ad87191efe9..24c28baff881 100644
--- a/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -9,8 +9,8 @@
 define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
 entry:
 ; NO-REALIGN-LABEL: test1
-; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
-; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
 ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
 ; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
@@ -21,14 +21,16 @@ entry:
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
 
 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 ; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
  %0 = load <16 x float>* @T3_retval, align 16
@@ -42,8 +44,8 @@ define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
 entry:
 ; REALIGN-LABEL: test2
 ; REALIGN: bfc sp, #0, #6
-; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
-; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
+; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
 ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
 ; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
 ; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
@@ -63,7 +65,8 @@ entry:
 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
 ; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
+; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #16
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
  %0 = load <16 x float>* @T3_retval, align 16
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 33ac4e125633..84ce4a7f0e79 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -46,8 +46,10 @@ entry:
 ; CHECK: movw [[REG2:r[0-9]+]], #16716
 ; CHECK: movt [[REG2:r[0-9]+]], #72
 ; CHECK: str [[REG2]], [r0, #32]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
@@ -57,8 +59,10 @@ entry:
 define void @t3(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t3:
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
 ; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
@@ -69,8 +73,7 @@ define void @t4(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t4:
 ; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
-; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]!
-; CHECK: strh [[REG5:r[0-9]+]], [r0]
+; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/ARM/setcc-type-mismatch.ll b/test/CodeGen/ARM/setcc-type-mismatch.ll
new file mode 100644
index 000000000000..2cfdba12db54
--- /dev/null
+++ b/test/CodeGen/ARM/setcc-type-mismatch.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
+
+define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) {
+; CHECK-LABEL: test_mismatched_setcc:
+; CHECK: vceq.i32 [[CMP128:q[0-9]+]], {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovn.i32 {{d[0-9]+}}, [[CMP128]]
+
+  %tst = icmp eq <4 x i22> %l, %r
+  store <4 x i1> %tst, <4 x i1>* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
index 19727dabf09e..f7328dc580ef 100644
--- a/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -88,6 +88,19 @@ if.end11:                                         ; preds = %num2long.exit
   ret i32 23
 }
 
+; When considering the producer of cmp's src as the subsuming instruction,
+; only consider that when the comparison is to 0.
+define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) {
+entry:
+; CHECK-LABEL: cmp_src_nonzero:
+; CHECK: sub
+; CHECK: cmp
+  %sub = sub i32 %a, %b
+  %cmp = icmp eq i32 %sub, 17
+  %ret = select i1 %cmp, i32 %x, i32 %y
+  ret i32 %ret
+}
+
 define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
 entry:
 ; CHECK-LABEL: float_sel:
@@ -144,3 +157,50 @@ entry:
   store i32 %sub, i32* @t
   ret double %ret
 }
+
+declare void @abort()
+declare void @exit(i32)
+
+; If the comparison uses the V bit (signed overflow/underflow), we can't
+; omit the comparison.
+define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) {
+entry:
+; CHECK-LABEL: cmp_slt0
+; CHECK: sub
+; CHECK: cmp
+; CHECK: bge
+  %load = load i32* @t, align 4
+  %sub = sub i32 %load, 17
+  %cmp = icmp slt i32 %sub, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  call void @abort()
+  unreachable
+
+if.else:
+  call void @exit(i32 0)
+  unreachable
+}
+
+; Same for the C bit. (Note the ult X, 0 is trivially
+; false, so the DAG combiner may or may not optimize it).
+define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) {
+entry:
+; CHECK-LABEL: cmp_ult0
+; CHECK: sub
+; CHECK: cmp
+; CHECK: bhs
+  %load = load i32* @t, align 4
+  %sub = sub i32 %load, 17
+  %cmp = icmp ult i32 %sub, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  call void @abort()
+  unreachable
+
+if.else:
+  call void @exit(i32 0)
+  unreachable
+}
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll
deleted file mode 100644
index 008bd1f6f8c8..000000000000
--- a/test/CodeGen/ARM/vector-load.ll
+++ /dev/null
@@ -1,253 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
-target triple = "thumbv7s-apple-ios8.0.0"
-
-define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
-;CHECK-LABEL: load_v8i8:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i8>** %ptr
-	%lA = load <8 x i8>* %A, align 1
-	ret <8 x i8> %lA
-}
-
-define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
-;CHECK-LABEL: load_v8i8_update:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i8>** %ptr
-	%lA = load <8 x i8>* %A, align 1
-	%inc = getelementptr <8 x i8>* %A, i38 1
-        store <8 x i8>* %inc, <8 x i8>** %ptr
-	ret <8 x i8> %lA
-}
-
-define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
-;CHECK-LABEL: load_v4i16:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i16>** %ptr
-	%lA = load <4 x i16>* %A, align 1
-	ret <4 x i16> %lA
-}
-
-define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
-;CHECK-LABEL: load_v4i16_update:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i16>** %ptr
-	%lA = load <4 x i16>* %A, align 1
-	%inc = getelementptr <4 x i16>* %A, i34 1
-        store <4 x i16>* %inc, <4 x i16>** %ptr
-	ret <4 x i16> %lA
-}
-
-define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
-;CHECK-LABEL: load_v2i32:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i32>** %ptr
-	%lA = load <2 x i32>* %A, align 1
-	ret <2 x i32> %lA
-}
-
-define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
-;CHECK-LABEL: load_v2i32_update:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i32>** %ptr
-	%lA = load <2 x i32>* %A, align 1
-	%inc = getelementptr <2 x i32>* %A, i32 1
-        store <2 x i32>* %inc, <2 x i32>** %ptr
-	ret <2 x i32> %lA
-}
-
-define <2 x float> @load_v2f32(<2 x float>** %ptr) {
-;CHECK-LABEL: load_v2f32:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x float>** %ptr
-	%lA = load <2 x float>* %A, align 1
-	ret <2 x float> %lA
-}
-
-define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
-;CHECK-LABEL: load_v2f32_update:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x float>** %ptr
-	%lA = load <2 x float>* %A, align 1
-	%inc = getelementptr <2 x float>* %A, i32 1
-        store <2 x float>* %inc, <2 x float>** %ptr
-	ret <2 x float> %lA
-}
-
-define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
-;CHECK-LABEL: load_v1i64:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <1 x i64>** %ptr
-	%lA = load <1 x i64>* %A, align 1
-	ret <1 x i64> %lA
-}
-
-define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
-;CHECK-LABEL: load_v1i64_update:
-;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <1 x i64>** %ptr
-	%lA = load <1 x i64>* %A, align 1
-	%inc = getelementptr <1 x i64>* %A, i31 1
-        store <1 x i64>* %inc, <1 x i64>** %ptr
-	ret <1 x i64> %lA
-}
-
-define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
-;CHECK-LABEL: load_v16i8:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <16 x i8>** %ptr
-	%lA = load <16 x i8>* %A, align 1
-	ret <16 x i8> %lA
-}
-
-define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
-;CHECK-LABEL: load_v16i8_update:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <16 x i8>** %ptr
-	%lA = load <16 x i8>* %A, align 1
-	%inc = getelementptr <16 x i8>* %A, i316 1
-        store <16 x i8>* %inc, <16 x i8>** %ptr
-	ret <16 x i8> %lA
-}
-
-define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
-;CHECK-LABEL: load_v8i16:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i16>** %ptr
-	%lA = load <8 x i16>* %A, align 1
-	ret <8 x i16> %lA
-}
-
-define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
-;CHECK-LABEL: load_v8i16_update:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i16>** %ptr
-	%lA = load <8 x i16>* %A, align 1
-	%inc = getelementptr <8 x i16>* %A, i38 1
-        store <8 x i16>* %inc, <8 x i16>** %ptr
-	ret <8 x i16> %lA
-}
-
-define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
-;CHECK-LABEL: load_v4i32:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i32>** %ptr
-	%lA = load <4 x i32>* %A, align 1
-	ret <4 x i32> %lA
-}
-
-define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
-;CHECK-LABEL: load_v4i32_update:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i32>** %ptr
-	%lA = load <4 x i32>* %A, align 1
-	%inc = getelementptr <4 x i32>* %A, i34 1
-        store <4 x i32>* %inc, <4 x i32>** %ptr
-	ret <4 x i32> %lA
-}
-
-define <4 x float> @load_v4f32(<4 x float>** %ptr) {
-;CHECK-LABEL: load_v4f32:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x float>** %ptr
-	%lA = load <4 x float>* %A, align 1
-	ret <4 x float> %lA
-}
-
-define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
-;CHECK-LABEL: load_v4f32_update:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x float>** %ptr
-	%lA = load <4 x float>* %A, align 1
-	%inc = getelementptr <4 x float>* %A, i34 1
-        store <4 x float>* %inc, <4 x float>** %ptr
-	ret <4 x float> %lA
-}
-
-define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
-;CHECK-LABEL: load_v2i64:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 1
-	ret <2 x i64> %lA
-}
-
-define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
-;CHECK-LABEL: load_v2i64_update:
-;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 1
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret <2 x i64> %lA
-}
-
-; Make sure we change the type to match alignment if necessary.
-define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
-;CHECK-LABEL: load_v2i64_update_aligned2:
-;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 2
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret <2 x i64> %lA
-}
-
-define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
-;CHECK-LABEL: load_v2i64_update_aligned4:
-;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 4
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret <2 x i64> %lA
-}
-
-define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
-;CHECK-LABEL: load_v2i64_update_aligned8:
-;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 8
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret <2 x i64> %lA
-}
-
-define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
-;CHECK-LABEL: load_v2i64_update_aligned16:
-;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 16
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret <2 x i64> %lA
-}
-
-; Make sure we don't break smaller-than-dreg extloads.
-define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
-;CHECK-LABEL: zextload_v8i8tov8i32:
-;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
-;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
-;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
-	%A = load <4 x i8>** %ptr
-	%lA = load <4 x i8>* %A, align 4
-        %zlA = zext <4 x i8> %lA to <4 x i32>
-	ret <4 x i32> %zlA
-}
-
-define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
-;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
-;CHECK: ldr.w   r[[PTRREG:[0-9]+]], [r0]
-;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
-;CHECK: add.w   r[[INCREG:[0-9]+]], r[[PTRREG]], #16
-;CHECK: str.w   r[[INCREG]], [r0]
-;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
-;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
-	%A = load <4 x i8>** %ptr
-	%lA = load <4 x i8>* %A, align 4
-	%inc = getelementptr <4 x i8>* %A, i38 4
-        store <4 x i8>* %inc, <4 x i8>** %ptr
-        %zlA = zext <4 x i8> %lA to <4 x i32>
-	ret <4 x i32> %zlA
-}
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll
deleted file mode 100644
index 9036a31d141d..000000000000
--- a/test/CodeGen/ARM/vector-store.ll
+++ /dev/null
@@ -1,258 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
-target triple = "thumbv7s-apple-ios8.0.0"
-
-define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
-;CHECK-LABEL: store_v8i8:
-;CHECK: str r1, [r0]
-	%A = load <8 x i8>** %ptr
-	store  <8 x i8> %val, <8 x i8>* %A, align 1
-	ret void
-}
-
-define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
-;CHECK-LABEL: store_v8i8_update:
-;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i8>** %ptr
-	store  <8 x i8> %val, <8 x i8>* %A, align 1
-	%inc = getelementptr <8 x i8>* %A, i38 1
-        store <8 x i8>* %inc, <8 x i8>** %ptr
-	ret void
-}
-
-define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
-;CHECK-LABEL: store_v4i16:
-;CHECK: str r1, [r0]
-	%A = load <4 x i16>** %ptr
-	store  <4 x i16> %val, <4 x i16>* %A, align 1
-	ret void
-}
-
-define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
-;CHECK-LABEL: store_v4i16_update:
-;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i16>** %ptr
-	store  <4 x i16> %val, <4 x i16>* %A, align 1
-	%inc = getelementptr <4 x i16>* %A, i34 1
-        store <4 x i16>* %inc, <4 x i16>** %ptr
-	ret void
-}
-
-define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
-;CHECK-LABEL: store_v2i32:
-;CHECK: str r1, [r0]
-	%A = load <2 x i32>** %ptr
-	store  <2 x i32> %val, <2 x i32>* %A, align 1
-	ret void
-}
-
-define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
-;CHECK-LABEL: store_v2i32_update:
-;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i32>** %ptr
-	store  <2 x i32> %val, <2 x i32>* %A, align 1
-	%inc = getelementptr <2 x i32>* %A, i32 1
-        store <2 x i32>* %inc, <2 x i32>** %ptr
-	ret void
-}
-
-define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
-;CHECK-LABEL: store_v2f32:
-;CHECK: str r1, [r0]
-	%A = load <2 x float>** %ptr
-	store  <2 x float> %val, <2 x float>* %A, align 1
-	ret void
-}
-
-define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
-;CHECK-LABEL: store_v2f32_update:
-;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x float>** %ptr
-	store  <2 x float> %val, <2 x float>* %A, align 1
-	%inc = getelementptr <2 x float>* %A, i32 1
-        store <2 x float>* %inc, <2 x float>** %ptr
-	ret void
-}
-
-define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
-;CHECK-LABEL: store_v1i64:
-;CHECK: str r1, [r0]
-	%A = load <1 x i64>** %ptr
-	store  <1 x i64> %val, <1 x i64>* %A, align 1
-	ret void
-}
-
-define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
-;CHECK-LABEL: store_v1i64_update:
-;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <1 x i64>** %ptr
-	store  <1 x i64> %val, <1 x i64>* %A, align 1
-	%inc = getelementptr <1 x i64>* %A, i31 1
-        store <1 x i64>* %inc, <1 x i64>** %ptr
-	ret void
-}
-
-define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
-;CHECK-LABEL: store_v16i8:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <16 x i8>** %ptr
-	store  <16 x i8> %val, <16 x i8>* %A, align 1
-	ret void
-}
-
-define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
-;CHECK-LABEL: store_v16i8_update:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <16 x i8>** %ptr
-	store  <16 x i8> %val, <16 x i8>* %A, align 1
-	%inc = getelementptr <16 x i8>* %A, i316 1
-        store <16 x i8>* %inc, <16 x i8>** %ptr
-	ret void
-}
-
-define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
-;CHECK-LABEL: store_v8i16:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i16>** %ptr
-	store  <8 x i16> %val, <8 x i16>* %A, align 1
-	ret void
-}
-
-define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
-;CHECK-LABEL: store_v8i16_update:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i16>** %ptr
-	store  <8 x i16> %val, <8 x i16>* %A, align 1
-	%inc = getelementptr <8 x i16>* %A, i38 1
-        store <8 x i16>* %inc, <8 x i16>** %ptr
-	ret void
-}
-
-define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
-;CHECK-LABEL: store_v4i32:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i32>** %ptr
-	store  <4 x i32> %val, <4 x i32>* %A, align 1
-	ret void
-}
-
-define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
-;CHECK-LABEL: store_v4i32_update:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i32>** %ptr
-	store  <4 x i32> %val, <4 x i32>* %A, align 1
-	%inc = getelementptr <4 x i32>* %A, i34 1
-        store <4 x i32>* %inc, <4 x i32>** %ptr
-	ret void
-}
-
-define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
-;CHECK-LABEL: store_v4f32:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x float>** %ptr
-	store  <4 x float> %val, <4 x float>* %A, align 1
-	ret void
-}
-
-define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
-;CHECK-LABEL: store_v4f32_update:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x float>** %ptr
-	store  <4 x float> %val, <4 x float>* %A, align 1
-	%inc = getelementptr <4 x float>* %A, i34 1
-        store <4 x float>* %inc, <4 x float>** %ptr
-	ret void
-}
-
-define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
-;CHECK-LABEL: store_v2i64:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i64>** %ptr
-	store  <2 x i64> %val, <2 x i64>* %A, align 1
-	ret void
-}
-
-define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
-;CHECK-LABEL: store_v2i64_update:
-;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	store  <2 x i64> %val, <2 x i64>* %A, align 1
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret void
-}
-
-define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
-;CHECK-LABEL: store_v2i64_update_aligned2:
-;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	store  <2 x i64> %val, <2 x i64>* %A, align 2
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret void
-}
-
-define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
-;CHECK-LABEL: store_v2i64_update_aligned4:
-;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	store  <2 x i64> %val, <2 x i64>* %A, align 4
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret void
-}
-
-define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
-;CHECK-LABEL: store_v2i64_update_aligned8:
-;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]!
-	%A = load <2 x i64>** %ptr
-	store  <2 x i64> %val, <2 x i64>* %A, align 8
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret void
-}
-
-define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) {
-;CHECK-LABEL: store_v2i64_update_aligned16:
-;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
-	%A = load <2 x i64>** %ptr
-	store  <2 x i64> %val, <2 x i64>* %A, align 16
-	%inc = getelementptr <2 x i64>* %A, i32 1
-        store <2 x i64>* %inc, <2 x i64>** %ptr
-	ret void
-}
-
-define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
-;CHECK-LABEL: truncstore_v4i32tov4i8:
-;CHECK: ldr.w   r9, [sp]
-;CHECK: vmov    {{d[0-9]+}}, r3, r9
-;CHECK: vmov    {{d[0-9]+}}, r1, r2
-;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
-;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
-;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
-	%A = load <4 x i8>** %ptr
-        %trunc = trunc <4 x i32> %val to <4 x i8>
-	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
-	ret void
-}
-
-define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) {
-;CHECK-LABEL: truncstore_v4i32tov4i8_fake_update:
-;CHECK: ldr.w   r9, [sp]
-;CHECK: vmov    {{d[0-9]+}}, r3, r9
-;CHECK: vmov    {{d[0-9]+}}, r1, r2
-;CHECK: movs    [[IMM16:r[0-9]+]], #16
-;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
-;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
-;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
-;CHECK: str     r[[PTRREG]], [r0]
-	%A = load <4 x i8>** %ptr
-        %trunc = trunc <4 x i32> %val to <4 x i8>
-	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
-	%inc = getelementptr <4 x i8>* %A, i38 4
-        store <4 x i8>* %inc, <4 x i8>** %ptr
-	ret void
-}
diff --git a/test/CodeGen/PowerPC/vsel-prom.ll b/test/CodeGen/PowerPC/vsel-prom.ll
new file mode 100644
index 000000000000..dd219ec0da6f
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsel-prom.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @Compute_Lateral() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %0 = select i1 undef, <2 x double> undef, <2 x double> zeroinitializer
+  %1 = extractelement <2 x double> %0, i32 1
+  store double %1, double* undef, align 8
+  ret void
+
+; CHECK-LABEL: @Compute_Lateral
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/R600/endcf-loop-header.ll b/test/CodeGen/R600/endcf-loop-header.ll
new file mode 100644
index 000000000000..e3c5b3c1c364
--- /dev/null
+++ b/test/CodeGen/R600/endcf-loop-header.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; This tests that the llvm.SI.end.cf intrinsic is not inserted into the
+; loop block.  This intrinsic will be lowered to s_or_b64 by the code
+; generator.
+
+; CHECK-LABEL: {{^}}test:
+
+; This is was lowered from the llvm.SI.end.cf intrinsic:
+; CHECK: s_or_b64 exec, exec
+
+; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}}
+; CHECK-NOT: s_or_b64 exec, exec
+; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
+define void @test(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %if, label %loop
+
+if:
+  store i32 0, i32 addrspace(1)* %out
+  br label %loop
+
+loop:
+  %tmp1 = phi i32 [0, %entry], [0, %if], [%inc, %loop]
+  %inc = add i32 %tmp1, %cond
+  %tmp2 = icmp ugt i32 %inc, 10
+  br i1 %tmp2, label %done, label %loop
+
+done:
+  %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1
+  store i32 %inc, i32 addrspace(1)* %tmp3
+  ret void
+}
diff --git a/test/CodeGen/R600/tti-unroll-prefs.ll b/test/CodeGen/R600/tti-unroll-prefs.ll
new file mode 100644
index 000000000000..0009c42f79bc
--- /dev/null
+++ b/test/CodeGen/R600/tti-unroll-prefs.ll
@@ -0,0 +1,58 @@
+; RUN: opt -loop-unroll -S -mtriple=amdgcn-- -mcpu=SI %s | FileCheck %s
+
+; This IR comes from this OpenCL C code:
+;
+; if (b + 4 > a) {
+;   for (int i = 0; i < 4; i++, b++) {
+;     if (b + 1 <= a)
+;       *(dst + c + b) = 0;
+;     else
+;       break;
+;   }
+; }
+;
+; This test is meant to check that this loop isn't unrolled into more than
+; four iterations.  The loop unrolling preferences we currently use cause this
+; loop to not be unrolled at all, but that may change in the future.
+
+; CHECK-LABEL: @test
+; CHECK: store i8 0, i8 addrspace(1)*
+; CHECK-NOT: store i8 0, i8 addrspace(1)*
+; CHECK: ret void
+define void @test(i8 addrspace(1)* nocapture %dst, i32 %a, i32 %b, i32 %c) {
+entry:
+  %add = add nsw i32 %b, 4
+  %cmp = icmp sgt i32 %add, %a
+  br i1 %cmp, label %for.cond.preheader, label %if.end7
+
+for.cond.preheader:                               ; preds = %entry
+  %cmp313 = icmp slt i32 %b, %a
+  br i1 %cmp313, label %if.then4.lr.ph, label %if.end7.loopexit
+
+if.then4.lr.ph:                                   ; preds = %for.cond.preheader
+  %0 = sext i32 %c to i64
+  br label %if.then4
+
+if.then4:                                         ; preds = %if.then4.lr.ph, %if.then4
+  %i.015 = phi i32 [ 0, %if.then4.lr.ph ], [ %inc, %if.then4 ]
+  %b.addr.014 = phi i32 [ %b, %if.then4.lr.ph ], [ %add2, %if.then4 ]
+  %add2 = add nsw i32 %b.addr.014, 1
+  %1 = sext i32 %b.addr.014 to i64
+  %add.ptr.sum = add nsw i64 %1, %0
+  %add.ptr5 = getelementptr inbounds i8 addrspace(1)* %dst, i64 %add.ptr.sum
+  store i8 0, i8 addrspace(1)* %add.ptr5, align 1
+  %inc = add nsw i32 %i.015, 1
+  %cmp1 = icmp slt i32 %inc, 4
+  %cmp3 = icmp slt i32 %add2, %a
+  %or.cond = and i1 %cmp3, %cmp1
+  br i1 %or.cond, label %if.then4, label %for.cond.if.end7.loopexit_crit_edge
+
+for.cond.if.end7.loopexit_crit_edge:              ; preds = %if.then4
+  br label %if.end7.loopexit
+
+if.end7.loopexit:                                 ; preds = %for.cond.if.end7.loopexit_crit_edge, %for.cond.preheader
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end7.loopexit, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/coff-comdat.ll b/test/CodeGen/X86/coff-comdat.ll
index dcbbe1097d53..44e1cb236e91 100644
--- a/test/CodeGen/X86/coff-comdat.ll
+++ b/test/CodeGen/X86/coff-comdat.ll
@@ -73,20 +73,20 @@ $vftable = comdat largest
 ; CHECK: .globl  @v8@0
 ; CHECK: .section        .text,"xr",discard,@f8@0
 ; CHECK: .globl  @f8@0
-; CHECK: .section        .bss,"wb",associative,_f1
+; CHECK: .section        .bss,"bw",associative,_f1
 ; CHECK: .globl  _v1
-; CHECK: .section        .bss,"wb",associative,_f2
+; CHECK: .section        .bss,"bw",associative,_f2
 ; CHECK: .globl  _v2
-; CHECK: .section        .bss,"wb",associative,_f3
+; CHECK: .section        .bss,"bw",associative,_f3
 ; CHECK: .globl  _v3
-; CHECK: .section        .bss,"wb",associative,_f4
+; CHECK: .section        .bss,"bw",associative,_f4
 ; CHECK: .globl  _v4
-; CHECK: .section        .bss,"wb",associative,_f5
+; CHECK: .section        .bss,"bw",associative,_f5
 ; CHECK: .globl  _v5
-; CHECK: .section        .bss,"wb",associative,_f6
+; CHECK: .section        .bss,"bw",associative,_f6
 ; CHECK: .globl  _v6
-; CHECK: .section        .bss,"wb",same_size,_f6
+; CHECK: .section        .bss,"bw",same_size,_f6
 ; CHECK: .globl  _f6
-; CHECK: .section        .rdata,"rd",largest,_vftable
+; CHECK: .section        .rdata,"dr",largest,_vftable
 ; CHECK: .globl  _vftable
 ; CHECK: _vftable = L_some_name+4
diff --git a/test/CodeGen/X86/constant-combines.ll b/test/CodeGen/X86/constant-combines.ll
new file mode 100644
index 000000000000..d2a6ef4f5d25
--- /dev/null
+++ b/test/CodeGen/X86/constant-combines.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define void @PR22524({ float, float }* %arg) {
+; Check that we can materialize the zero constants we store in two places here,
+; and at least form a legal store of the floating point value at the end.
+; The DAG combiner at one point contained bugs that given enough permutations
+; would incorrectly form an illegal operation for the last of these stores when
+; it folded it to a zero too late to legalize the zero store operation. If this
+; ever starts forming a zero store instead of movss, the test case has stopped
+; being useful.
+; 
+; CHECK-LABEL: PR22524:
+entry:
+  %0 = getelementptr inbounds { float, float }* %arg,  i32 0, i32 1
+  store float 0.000000e+00, float* %0, align 4
+; CHECK: movl $0, 4(%rdi)
+
+  %1 = getelementptr inbounds { float, float }* %arg, i64 0,  i32 0
+  %2 = bitcast float* %1 to i64*
+  %3 = load i64* %2, align 8
+  %4 = trunc i64 %3 to i32
+  %5 = lshr i64 %3, 32
+  %6 = trunc i64 %5 to i32
+  %7 = bitcast i32 %6 to float
+  %8 = fmul float %7, 0.000000e+00
+  %9 = bitcast float* %1 to i32*
+  store i32 %6, i32* %9, align 4
+; CHECK: movl $0, (%rdi)
+  store float %8, float* %0, align 4
+; CHECK: movss %{{.*}}, 4(%rdi)
+  ret void
+}
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index c673f5d485f9..cf4557d12716 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -40,18 +40,18 @@ define weak_odr dllexport void @weak1() {
 ; CHECK: .globl Var1
 @Var1 = dllexport global i32 1, align 4
 
-; CHECK: .rdata,"rd"
+; CHECK: .rdata,"dr"
 ; CHECK: .globl Var2
 @Var2 = dllexport unnamed_addr constant i32 1
 
 ; CHECK: .comm Var3
 @Var3 = common dllexport global i32 0, align 4
 
-; CHECK: .section .data,"wd",discard,WeakVar1
+; CHECK: .section .data,"dw",discard,WeakVar1
 ; CHECK: .globl WeakVar1
 @WeakVar1 = weak_odr dllexport global i32 1, align 4
 
-; CHECK: .section .rdata,"rd",discard,WeakVar2
+; CHECK: .section .rdata,"dr",discard,WeakVar2
 ; CHECK: .globl WeakVar2
 @WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1
 
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index 5035aa153301..145b48aaf635 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -21,6 +21,8 @@ define dllexport void @f2() unnamed_addr {
 	ret void
 }
 
+declare dllexport void @not_defined()
+
 ; CHECK: .globl _stdfun@0
 define dllexport x86_stdcallcc void @stdfun() nounwind {
 	ret void
@@ -59,18 +61,18 @@ define weak_odr dllexport void @weak1() {
 ; CHECK: .globl _Var1
 @Var1 = dllexport global i32 1, align 4
 
-; CHECK: .rdata,"rd"
+; CHECK: .rdata,"dr"
 ; CHECK: .globl _Var2
 @Var2 = dllexport unnamed_addr constant i32 1
 
 ; CHECK: .comm _Var3
 @Var3 = common dllexport global i32 0, align 4
 
-; CHECK: .section .data,"wd",discard,_WeakVar1
+; CHECK: .section .data,"dw",discard,_WeakVar1
 ; CHECK: .globl _WeakVar1
 @WeakVar1 = weak_odr dllexport global i32 1, align 4
 
-; CHECK: .section .rdata,"rd",discard,_WeakVar2
+; CHECK: .section .rdata,"dr",discard,_WeakVar2
 ; CHECK: .globl _WeakVar2
 @WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1
 
@@ -91,7 +93,6 @@ define weak_odr dllexport void @weak1() {
 ; CHECK: _weak_alias = _f1
 @weak_alias = weak_odr dllexport alias void()* @f1
 
-
 ; CHECK: .section .drectve
 ; CHECK-CL: " /EXPORT:_Var1,DATA"
 ; CHECK-CL: " /EXPORT:_Var2,DATA"
@@ -100,6 +101,7 @@ define weak_odr dllexport void @weak1() {
 ; CHECK-CL: " /EXPORT:_WeakVar2,DATA"
 ; CHECK-CL: " /EXPORT:_f1"
 ; CHECK-CL: " /EXPORT:_f2"
+; CHECK-CL-NOT: not_exported
 ; CHECK-CL: " /EXPORT:_stdfun@0"
 ; CHECK-CL: " /EXPORT:@fastfun@0"
 ; CHECK-CL: " /EXPORT:_thisfun"
@@ -117,6 +119,7 @@ define weak_odr dllexport void @weak1() {
 ; CHECK-GCC: " -export:WeakVar2,data"
 ; CHECK-GCC: " -export:f1"
 ; CHECK-GCC: " -export:f2"
+; CHECK-CL-NOT: not_exported
 ; CHECK-GCC: " -export:stdfun@0"
 ; CHECK-GCC: " -export:@fastfun@0"
 ; CHECK-GCC: " -export:thisfun"
diff --git a/test/CodeGen/X86/fold-vex.ll b/test/CodeGen/X86/fold-vex.ll
index 2bb5b441c7c0..5a8b1d8cbfdf 100644
--- a/test/CodeGen/X86/fold-vex.ll
+++ b/test/CodeGen/X86/fold-vex.ll
@@ -1,16 +1,31 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
+; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition.
 
-;CHECK: @test
-; No need to load from memory. The operand will be loaded as part of th AND instr.
-;CHECK-NOT: vmovaps
-;CHECK: vandps
-;CHECK: ret
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown                  -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx             | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2                 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown                  -mattr=-avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2     -mattr=-avx | FileCheck %s --check-prefix=SSE
 
-define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind {
-entry:
-  %in0 = load <8 x i32>* %p0, align 2
-  %a = and <8 x i32> %in0, %in1
-  store <8 x i32> %a, <8 x i32>* undef
-  ret void
+; No need to load unaligned operand from memory using an explicit instruction with AVX.
+; The operand should be folded into the AND instr.
+
+; With SSE, folding memory operands into math/logic ops requires 16-byte alignment
+; unless specially configured on some CPUs such as AMD Family 10H.
+
+define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
+  %in0 = load <4 x i32>* %p0, align 2
+  %a = and <4 x i32> %in0, %in1
+  ret <4 x i32> %a
+
+; CHECK-LABEL: @test1
+; CHECK-NOT:   vmovups
+; CHECK:       vandps (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:  ret
+
+; SSE-LABEL: @test1
+; SSE:       movups (%rdi), %xmm1
+; SSE-NEXT:  andps %xmm1, %xmm0
+; SSE-NEXT:  ret
 }
 
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index fa1169d8a8e3..d6e45ad79ea9 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -48,7 +48,7 @@ define void @F1() {
 ; LINUX-SECTIONS: .section        .rodata.G3,"a",@progbits
 ; LINUX-SECTIONS: .globl  G3
 
-; WIN32-SECTIONS: .section        .rdata,"rd",one_only,_G3
+; WIN32-SECTIONS: .section        .rdata,"dr",one_only,_G3
 ; WIN32-SECTIONS: .globl  _G3
 
 
@@ -126,7 +126,7 @@ define void @F1() {
 ; LINUX-SECTIONS: .section        .rodata.G7,"aMS",@progbits,1
 ; LINUX-SECTIONS:       .globl G7
 
-; WIN32-SECTIONS: .section        .rdata,"rd",one_only,_G7
+; WIN32-SECTIONS: .section        .rdata,"dr",one_only,_G7
 ; WIN32-SECTIONS:       .globl _G7
 
 
@@ -189,7 +189,7 @@ define void @F1() {
 ; LINUX-SECTIONS:        .asciz  "foo"
 ; LINUX-SECTIONS:        .size   .LG14, 4
 
-; WIN32-SECTIONS:        .section        .rdata,"rd"
+; WIN32-SECTIONS:        .section        .rdata,"dr"
 ; WIN32-SECTIONS: L_G14:
 ; WIN32-SECTIONS:        .asciz  "foo"
 
@@ -211,5 +211,5 @@ define void @F1() {
 ; LINUX-SECTIONS: .section      .rodata.G15,"aM",@progbits,8
 ; LINUX-SECTIONS: G15:
 
-; WIN32-SECTIONS: .section      .rdata,"rd",one_only,_G15
+; WIN32-SECTIONS: .section      .rdata,"dr",one_only,_G15
 ; WIN32-SECTIONS: _G15:
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
index b4dc5fd47168..90df9905fe1a 100644
--- a/test/CodeGen/X86/pr15267.ll
+++ b/test/CodeGen/X86/pr15267.ll
@@ -4,8 +4,7 @@ define <4 x i3> @test1(<4 x i3>* %in) nounwind {
   %ret = load <4 x i3>* %in, align 1
   ret <4 x i3> %ret
 }
-
-; CHECK: test1
+; CHECK-LABEL: test1
 ; CHECK: movzwl
 ; CHECK: shrl $3
 ; CHECK: andl $7
@@ -25,7 +24,7 @@ define <4 x i1> @test2(<4 x i1>* %in) nounwind {
   ret <4 x i1> %ret
 }
 
-; CHECK: test2
+; CHECK-LABEL: test2
 ; CHECK: movzbl
 ; CHECK: shrl
 ; CHECK: andl $1
@@ -46,7 +45,7 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
   ret <4 x i64> %sext
 }
 
-; CHECK: test3
+; CHECK-LABEL: test3
 ; CHECK: movzbl
 ; CHECK: movq
 ; CHECK: shlq
@@ -67,3 +66,71 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
 ; CHECK: vpunpcklqdq
 ; CHECK: vinsertf128
 ; CHECK: ret
+
+define <16 x i4> @test4(<16 x i4>* %in) nounwind {
+  %ret = load <16 x i4>* %in, align 1
+  ret <16 x i4> %ret
+}
+
+; CHECK-LABEL: test4
+; CHECK: movl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: andl
+; CHECK-NEXT: movl
+; CHECK-NEXT: andl
+; CHECK-NEXT: vmovd
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movl
+; CHECK-NEXT: shrl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movq
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movq
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movq
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movq
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movq
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movq
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: movq
+; CHECK-NEXT: shrq
+; CHECK-NEXT: andl
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: shrq
+; CHECK-NEXT: vpinsrb
+; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll
index 303c4a684761..ca5a02ce8d3a 100644
--- a/test/CodeGen/X86/pshufb-mask-comments.ll
+++ b/test/CodeGen/X86/pshufb-mask-comments.ll
@@ -37,4 +37,16 @@ define <16 x i8> @test4(<2 x i64>* %V) {
   ret <16 x i8> %1
 }
 
+define <16 x i8> @test5() {
+; CHECK-LABEL: test5
+; CHECK: pshufb {{.*}}
+  store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16
+  %l = load <2 x i64>* undef, align 16
+  %shuffle = shufflevector <2 x i64> %l, <2 x i64> undef, <2 x i32> zeroinitializer
+  store <2 x i64> %shuffle, <2 x i64>* undef, align 16
+  %1 = load <16 x i8>* undef, align 16
+  %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> %1)
+  ret <16 x i8> %2
+}
+
 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
diff --git a/test/CodeGen/X86/seh-basic.ll b/test/CodeGen/X86/seh-basic.ll
deleted file mode 100644
index 69d70d70948c..000000000000
--- a/test/CodeGen/X86/seh-basic.ll
+++ /dev/null
@@ -1,175 +0,0 @@
-; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
-
-define void @two_invoke_merged() {
-entry:
-  invoke void @try_body()
-          to label %again unwind label %lpad
-
-again:
-  invoke void @try_body()
-          to label %done unwind label %lpad
-
-done:
-  ret void
-
-lpad:
-  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
-          catch i8* bitcast (i32 (i8*, i8*)* @filt1 to i8*)
-  %sel = extractvalue { i8*, i32 } %vals, 1
-  call void @use_selector(i32 %sel)
-  ret void
-}
-
-; Normal path code
-
-; CHECK-LABEL: {{^}}two_invoke_merged:
-; CHECK: .seh_proc two_invoke_merged
-; CHECK: .seh_handler __C_specific_handler, @unwind, @except
-; CHECK: .Ltmp0:
-; CHECK: callq try_body
-; CHECK-NEXT: .Ltmp1:
-; CHECK: .Ltmp2:
-; CHECK: callq try_body
-; CHECK-NEXT: .Ltmp3:
-; CHECK: retq
-
-; Landing pad code
-
-; CHECK: .Ltmp5:
-; CHECK: movl $1, %ecx
-; CHECK: jmp
-; CHECK: .Ltmp6:
-; CHECK: movl $2, %ecx
-; CHECK: callq use_selector
-
-; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long .Ltmp0@IMGREL
-; CHECK-NEXT: .long .Ltmp3@IMGREL+1
-; CHECK-NEXT: .long filt0@IMGREL
-; CHECK-NEXT: .long .Ltmp5@IMGREL
-; CHECK-NEXT: .long .Ltmp0@IMGREL
-; CHECK-NEXT: .long .Ltmp3@IMGREL+1
-; CHECK-NEXT: .long filt1@IMGREL
-; CHECK-NEXT: .long .Ltmp6@IMGREL
-; CHECK: .text
-; CHECK: .seh_endproc
-
-define void @two_invoke_gap() {
-entry:
-  invoke void @try_body()
-          to label %again unwind label %lpad
-
-again:
-  call void @do_nothing_on_unwind()
-  invoke void @try_body()
-          to label %done unwind label %lpad
-
-done:
-  ret void
-
-lpad:
-  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
-  %sel = extractvalue { i8*, i32 } %vals, 1
-  call void @use_selector(i32 %sel)
-  ret void
-}
-
-; Normal path code
-
-; CHECK-LABEL: {{^}}two_invoke_gap:
-; CHECK: .seh_proc two_invoke_gap
-; CHECK: .seh_handler __C_specific_handler, @unwind, @except
-; CHECK: .Ltmp11:
-; CHECK: callq try_body
-; CHECK-NEXT: .Ltmp12:
-; CHECK: callq do_nothing_on_unwind
-; CHECK: .Ltmp13:
-; CHECK: callq try_body
-; CHECK-NEXT: .Ltmp14:
-; CHECK: retq
-
-; Landing pad code
-
-; CHECK: .Ltmp16:
-; CHECK: movl $1, %ecx
-; CHECK: callq use_selector
-
-; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long .Ltmp11@IMGREL
-; CHECK-NEXT: .long .Ltmp12@IMGREL+1
-; CHECK-NEXT: .long filt0@IMGREL
-; CHECK-NEXT: .long .Ltmp16@IMGREL
-; CHECK-NEXT: .long .Ltmp13@IMGREL
-; CHECK-NEXT: .long .Ltmp14@IMGREL+1
-; CHECK-NEXT: .long filt0@IMGREL
-; CHECK-NEXT: .long .Ltmp16@IMGREL
-; CHECK: .text
-; CHECK: .seh_endproc
-
-define void @two_invoke_nounwind_gap() {
-entry:
-  invoke void @try_body()
-          to label %again unwind label %lpad
-
-again:
-  call void @cannot_unwind()
-  invoke void @try_body()
-          to label %done unwind label %lpad
-
-done:
-  ret void
-
-lpad:
-  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
-  %sel = extractvalue { i8*, i32 } %vals, 1
-  call void @use_selector(i32 %sel)
-  ret void
-}
-
-; Normal path code
-
-; CHECK-LABEL: {{^}}two_invoke_nounwind_gap:
-; CHECK: .seh_proc two_invoke_nounwind_gap
-; CHECK: .seh_handler __C_specific_handler, @unwind, @except
-; CHECK: .Ltmp21:
-; CHECK: callq try_body
-; CHECK-NEXT: .Ltmp22:
-; CHECK: callq cannot_unwind
-; CHECK: .Ltmp23:
-; CHECK: callq try_body
-; CHECK-NEXT: .Ltmp24:
-; CHECK: retq
-
-; Landing pad code
-
-; CHECK: .Ltmp26:
-; CHECK: movl $1, %ecx
-; CHECK: callq use_selector
-
-; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long .Ltmp21@IMGREL
-; CHECK-NEXT: .long .Ltmp24@IMGREL+1
-; CHECK-NEXT: .long filt0@IMGREL
-; CHECK-NEXT: .long .Ltmp26@IMGREL
-; CHECK: .text
-; CHECK: .seh_endproc
-
-declare void @try_body()
-declare void @do_nothing_on_unwind()
-declare void @cannot_unwind() nounwind
-declare void @use_selector(i32)
-
-declare i32 @filt0(i8* %eh_info, i8* %rsp)
-declare i32 @filt1(i8* %eh_info, i8* %rsp)
-
-declare void @handler0()
-declare void @handler1()
-
-declare i32 @__C_specific_handler(...)
-declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll
deleted file mode 100644
index e911df04ded4..000000000000
--- a/test/CodeGen/X86/seh-safe-div.ll
+++ /dev/null
@@ -1,196 +0,0 @@
-; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
-
-; This test case is also intended to be run manually as a complete functional
-; test. It should link, print something, and exit zero rather than crashing.
-; It is the hypothetical lowering of a C source program that looks like:
-;
-;   int safe_div(int *n, int *d) {
-;     int r;
-;     __try {
-;       __try {
-;         r = *n / *d;
-;       } __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) {
-;         puts("EXCEPTION_ACCESS_VIOLATION");
-;         r = -1;
-;       }
-;     } __except(GetExceptionCode() == EXCEPTION_INT_DIVIDE_BY_ZERO) {
-;       puts("EXCEPTION_INT_DIVIDE_BY_ZERO");
-;       r = -2;
-;     }
-;     return r;
-;   }
-
-@str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00"
-@str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00"
-
-define i32 @safe_div(i32* %n, i32* %d) {
-entry:
-  %r = alloca i32, align 4
-  invoke void @try_body(i32* %r, i32* %n, i32* %d)
-          to label %__try.cont unwind label %lpad
-
-lpad:
-  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)
-          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)
-  %ehptr = extractvalue { i8*, i32 } %vals, 0
-  %sel = extractvalue { i8*, i32 } %vals, 1
-  %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*))
-  %is_filt0 = icmp eq i32 %sel, %filt0_val
-  br i1 %is_filt0, label %handler0, label %eh.dispatch1
-
-eh.dispatch1:
-  %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*))
-  %is_filt1 = icmp eq i32 %sel, %filt1_val
-  br i1 %is_filt1, label %handler1, label %eh.resume
-
-handler0:
-  call void @puts(i8* getelementptr ([27 x i8]* @str1, i32 0, i32 0))
-  store i32 -1, i32* %r, align 4
-  br label %__try.cont
-
-handler1:
-  call void @puts(i8* getelementptr ([29 x i8]* @str2, i32 0, i32 0))
-  store i32 -2, i32* %r, align 4
-  br label %__try.cont
-
-eh.resume:
-  resume { i8*, i32 } %vals
-
-__try.cont:
-  %safe_ret = load i32* %r, align 4
-  ret i32 %safe_ret
-}
-
-; Normal path code
-
-; CHECK: {{^}}safe_div:
-; CHECK: .seh_proc safe_div
-; CHECK: .seh_handler __C_specific_handler, @unwind, @except
-; CHECK: .Ltmp0:
-; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx
-; CHECK: callq try_body
-; CHECK-NEXT: .Ltmp1
-; CHECK: .LBB0_7:
-; CHECK: movl [[rloc]], %eax
-; CHECK: retq
-
-; Landing pad code
-
-; CHECK: .Ltmp3:
-; CHECK: movl $1, %[[sel:[a-z]+]]
-; CHECK: .Ltmp4
-; CHECK: movl $2, %[[sel]]
-; CHECK: .L{{.*}}:
-; CHECK: cmpl $1, %[[sel]]
-
-; CHECK: # %handler0
-; CHECK: callq puts
-; CHECK: movl $-1, [[rloc]]
-; CHECK: jmp .LBB0_7
-
-; CHECK: cmpl $2, %[[sel]]
-
-; CHECK: # %handler1
-; CHECK: callq puts
-; CHECK: movl $-2, [[rloc]]
-; CHECK: jmp .LBB0_7
-
-; FIXME: EH preparation should not call _Unwind_Resume.
-; CHECK: callq _Unwind_Resume
-; CHECK: ud2
-
-; CHECK: .seh_handlerdata
-; CHECK: .long 2
-; CHECK: .long .Ltmp0@IMGREL
-; CHECK: .long .Ltmp1@IMGREL+1
-; CHECK: .long safe_div_filt0@IMGREL
-; CHECK: .long .Ltmp3@IMGREL
-; CHECK: .long .Ltmp0@IMGREL
-; CHECK: .long .Ltmp1@IMGREL+1
-; CHECK: .long safe_div_filt1@IMGREL
-; CHECK: .long .Ltmp4@IMGREL
-; CHECK: .text
-; CHECK: .seh_endproc
-
-
-define void @try_body(i32* %r, i32* %n, i32* %d) {
-entry:
-  %0 = load i32* %n, align 4
-  %1 = load i32* %d, align 4
-  %div = sdiv i32 %0, %1
-  store i32 %div, i32* %r, align 4
-  ret void
-}
-
-; The prototype of these filter functions is:
-; int filter(EXCEPTION_POINTERS *eh_ptrs, void *rbp);
-
-; The definition of EXCEPTION_POINTERS is:
-;   typedef struct _EXCEPTION_POINTERS {
-;     EXCEPTION_RECORD *ExceptionRecord;
-;     CONTEXT          *ContextRecord;
-;   } EXCEPTION_POINTERS;
-
-; The definition of EXCEPTION_RECORD is:
-;   typedef struct _EXCEPTION_RECORD {
-;     DWORD ExceptionCode;
-;     ...
-;   } EXCEPTION_RECORD;
-
-; The exception code can be retreived with two loads, one for the record
-; pointer and one for the code.  The values of local variables can be
-; accessed via rbp, but that would require additional not yet implemented LLVM
-; support.
-
-define i32 @safe_div_filt0(i8* %eh_ptrs, i8* %rbp) {
-  %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
-  %eh_rec = load i32** %eh_ptrs_c
-  %eh_code = load i32* %eh_rec
-  ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005
-  %cmp = icmp eq i32 %eh_code, 3221225477
-  %filt.res = zext i1 %cmp to i32
-  ret i32 %filt.res
-}
-
-define i32 @safe_div_filt1(i8* %eh_ptrs, i8* %rbp) {
-  %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
-  %eh_rec = load i32** %eh_ptrs_c
-  %eh_code = load i32* %eh_rec
-  ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094
-  %cmp = icmp eq i32 %eh_code, 3221225620
-  %filt.res = zext i1 %cmp to i32
-  ret i32 %filt.res
-}
-
-@str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00"
-
-define i32 @main() {
-  %d.addr = alloca i32, align 4
-  %n.addr = alloca i32, align 4
-
-  store i32 10, i32* %n.addr, align 4
-  store i32 2, i32* %d.addr, align 4
-  %r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
-  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r1)
-
-  store i32 10, i32* %n.addr, align 4
-  store i32 0, i32* %d.addr, align 4
-  %r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
-  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r2)
-
-  %r3 = call i32 @safe_div(i32* %n.addr, i32* null)
-  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r3)
-  ret i32 0
-}
-
-define void @_Unwind_Resume() {
-  call void @abort()
-  unreachable
-}
-
-declare i32 @__C_specific_handler(...)
-declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
-declare void @puts(i8*)
-declare void @printf(i8*, ...)
-declare void @abort()
diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/sse-unaligned-mem-feature.ll
index bb24adb41817..15f91ee04eaf 100644
--- a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
+++ b/test/CodeGen/X86/sse-unaligned-mem-feature.ll
@@ -1,5 +1,4 @@
-; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
-; CHECK: addps (
+; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -8,4 +7,7 @@ define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
 	%A = load <4 x float>* %P, align 4
 	%B = fadd <4 x float> %A, %In
 	ret <4 x float> %B
+
+; CHECK-LABEL: @foo
+; CHECK:       addps (
 }
diff --git a/test/CodeGen/X86/win_cst_pool.ll b/test/CodeGen/X86/win_cst_pool.ll
index e8b853a03dae..d534b126b192 100644
--- a/test/CodeGen/X86/win_cst_pool.ll
+++ b/test/CodeGen/X86/win_cst_pool.ll
@@ -6,7 +6,7 @@ define double @double() {
   ret double 0x0000000000800000
 }
 ; CHECK:              .globl  __real@0000000000800000
-; CHECK-NEXT:         .section        .rdata,"rd",discard,__real@0000000000800000
+; CHECK-NEXT:         .section        .rdata,"dr",discard,__real@0000000000800000
 ; CHECK-NEXT:         .align  8
 ; CHECK-NEXT: __real@0000000000800000:
 ; CHECK-NEXT:         .quad   8388608
@@ -18,7 +18,7 @@ define <4 x i32> @vec1() {
   ret <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 }
 ; CHECK:              .globl  __xmm@00000000000000010000000200000003
-; CHECK-NEXT:         .section        .rdata,"rd",discard,__xmm@00000000000000010000000200000003
+; CHECK-NEXT:         .section        .rdata,"dr",discard,__xmm@00000000000000010000000200000003
 ; CHECK-NEXT:         .align  16
 ; CHECK-NEXT: __xmm@00000000000000010000000200000003:
 ; CHECK-NEXT:         .long   3
@@ -33,7 +33,7 @@ define <8 x i16> @vec2() {
   ret <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
 }
 ; CHECK:             .globl  __xmm@00000001000200030004000500060007
-; CHECK-NEXT:        .section        .rdata,"rd",discard,__xmm@00000001000200030004000500060007
+; CHECK-NEXT:        .section        .rdata,"dr",discard,__xmm@00000001000200030004000500060007
 ; CHECK-NEXT:        .align  16
 ; CHECK-NEXT: __xmm@00000001000200030004000500060007:
 ; CHECK-NEXT:        .short  7
@@ -53,7 +53,7 @@ define <4 x float> @undef1() {
   ret <4 x float> <float 1.0, float 1.0, float undef, float undef>
 
 ; CHECK:             .globl  __xmm@00000000000000003f8000003f800000
-; CHECK-NEXT:        .section        .rdata,"rd",discard,__xmm@00000000000000003f8000003f800000
+; CHECK-NEXT:        .section        .rdata,"dr",discard,__xmm@00000000000000003f8000003f800000
 ; CHECK-NEXT:        .align  16
 ; CHECK-NEXT: __xmm@00000000000000003f8000003f800000:
 ; CHECK-NEXT:        .long   1065353216              # float 1
diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll
index 44ee4f9ce4f4..4d5cdda5659a 100644
--- a/test/DebugInfo/COFF/asm.ll
+++ b/test/DebugInfo/COFF/asm.ll
@@ -22,7 +22,7 @@
 ; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:^L.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rd"
+; X86-LABEL: .section        .debug$S,"dr"
 ; X86-NEXT: .long   4
 ; Symbol subsection
 ; X86-NEXT: .long   241
@@ -127,7 +127,7 @@
 ; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rd"
+; X64-LABEL: .section        .debug$S,"dr"
 ; X64-NEXT: .long   4
 ; Symbol subsection
 ; X64-NEXT: .long   241
diff --git a/test/DebugInfo/COFF/multifile.ll b/test/DebugInfo/COFF/multifile.ll
index 5cdd6dc2e51b..52a62d1c3ca9 100644
--- a/test/DebugInfo/COFF/multifile.ll
+++ b/test/DebugInfo/COFF/multifile.ll
@@ -29,7 +29,7 @@
 ; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rd"
+; X86-LABEL: .section        .debug$S,"dr"
 ; X86-NEXT: .long   4
 ; Symbol subsection
 ; X86-NEXT: .long   241
@@ -159,7 +159,7 @@
 ; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rd"
+; X64-LABEL: .section        .debug$S,"dr"
 ; X64-NEXT: .long   4
 ; Symbol subsection
 ; X64-NEXT: .long   241
diff --git a/test/DebugInfo/COFF/multifunction.ll b/test/DebugInfo/COFF/multifunction.ll
index 8f9a3f8b9b75..01ba617dc1d7 100644
--- a/test/DebugInfo/COFF/multifunction.ll
+++ b/test/DebugInfo/COFF/multifunction.ll
@@ -53,7 +53,7 @@
 ; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rd"
+; X86-LABEL: .section        .debug$S,"dr"
 ; X86-NEXT: .long   4
 ; Symbol subsection for x
 ; X86-NEXT: .long   241
@@ -317,7 +317,7 @@
 ; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rd"
+; X64-LABEL: .section        .debug$S,"dr"
 ; X64-NEXT: .long   4
 ; Symbol subsection for x
 ; X64-NEXT: .long   241
diff --git a/test/DebugInfo/COFF/simple.ll b/test/DebugInfo/COFF/simple.ll
index bcb8a695c7f1..196deefe8c4e 100644
--- a/test/DebugInfo/COFF/simple.ll
+++ b/test/DebugInfo/COFF/simple.ll
@@ -20,7 +20,7 @@
 ; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rd"
+; X86-LABEL: .section        .debug$S,"dr"
 ; X86-NEXT: .long   4
 ; Symbol subsection
 ; X86-NEXT: .long   241
@@ -118,7 +118,7 @@
 ; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rd"
+; X64-LABEL: .section        .debug$S,"dr"
 ; X64-NEXT: .long   4
 ; Symbol subsection
 ; X64-NEXT: .long   241
diff --git a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index 181359b905ab..83d976d24056 100644
--- a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -22,7 +22,7 @@
 ; X86-NEXT: [[END_OF_BAR:^L.*]]:{{$}}
 ; X86-NOT:  ret
 
-; X86-LABEL: .section        .debug$S,"rd"
+; X86-LABEL: .section        .debug$S,"dr"
 ; X86:       .secrel32 "?bar@@YAXHZZ"
 ; X86-NEXT:  .secidx   "?bar@@YAXHZZ"
 ; X86:       .long   0
diff --git a/test/DebugInfo/X86/coff_debug_info_type.ll b/test/DebugInfo/X86/coff_debug_info_type.ll
index d34f50b7244b..89859d246af8 100644
--- a/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -6,7 +6,7 @@
 ; CHECK:    .section  .apple_types
 
 ; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s
-; WIN32:    .section .debug$S,"rd"
+; WIN32:    .section .debug$S,"dr"
 
 ; RUN: llc -mtriple=i686-pc-win32 -filetype=null -O0 < %s
 
diff --git a/test/DebugInfo/location-verifier.ll b/test/DebugInfo/location-verifier.ll
new file mode 100644
index 000000000000..0e56be42e1f9
--- /dev/null
+++ b/test/DebugInfo/location-verifier.ll
@@ -0,0 +1,33 @@
+; RUN: not llvm-as -disable-output -verify-debug-info < %s 2>&1 | FileCheck %s
+; ModuleID = 'test.c'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo() #0 {
+entry:
+  ret i32 42, !dbg !13
+}
+
+attributes #0 = { nounwind ssp uwtable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = !{!"0x11\0012\00clang version 3.7.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\000\000\001", !1, !5, !6, null, i32 ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}                               ; [ DW_TAG_file_type ] [/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 3.7.0 "}
+; An old-style MDLocation should not pass verify.
+; CHECK: DISubprogram does not Verify
+!13 = !{i32 2, i32 2, !4, null}
diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll
index c8f3b88815bb..28736ad79029 100644
--- a/test/Instrumentation/MemorySanitizer/atomics.ll
+++ b/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -1,4 +1,6 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=2 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Linker/distinct-cycles.ll b/test/Linker/distinct-cycles.ll
new file mode 100644
index 000000000000..b9b496c50c14
--- /dev/null
+++ b/test/Linker/distinct-cycles.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-link -o - -S %s | FileCheck %s
+; Crasher for PR22456: MapMetadata() should resolve all cycles.
+
+; CHECK: !named = !{!0}
+!named = !{!0}
+
+; CHECK: !0 = distinct !{!1}
+!0 = distinct !{!1}
+
+; CHECK-NEXT: !1 = !{!2}
+; CHECK-NEXT: !2 = !{!1}
+!1 = !{!2}
+!2 = !{!1}
diff --git a/test/MC/ARM/pr22395-2.s b/test/MC/ARM/pr22395-2.s
new file mode 100644
index 000000000000..3d2a10d6bbbc
--- /dev/null
+++ b/test/MC/ARM/pr22395-2.s
@@ -0,0 +1,37 @@
+@ RUN: llvm-mc -triple armv4t-eabi -mattr +d16 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+	.text
+	.thumb
+
+	.p2align 2
+
+	.fpu vfpv3
+	vldmia r0, {d16-d31}
+@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+@ CHECK-NOT: error: register expected
+
+	.fpu vfpv4
+	vldmia r0, {d16-d31}
+@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+@ CHECK-NOT: error: register expected
+
+	.fpu neon
+	vldmia r0, {d16-d31}
+@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+@ CHECK-NOT: error: register expected
+
+	.fpu neon-vfpv4
+	vldmia r0, {d16-d31}
+@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+@ CHECK-NOT: error: register expected
+
+	.fpu neon-fp-armv8
+	vldmia r0, {d16-d31}
+@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+@ CHECK-NOT: error: register expected
+
+	.fpu crypto-neon-fp-armv8
+	vldmia r0, {d16-d31}
+@ CHECK: vldmia	r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+@ CHECK-NOT: error: register expected
+
diff --git a/test/MC/COFF/bss_section.ll b/test/MC/COFF/bss_section.ll
index 1921eeb61a65..477b3dfbd3a6 100644
--- a/test/MC/COFF/bss_section.ll
+++ b/test/MC/COFF/bss_section.ll
@@ -6,4 +6,4 @@
 ; CHECK: .bss
 
 @thingy_linkonce = linkonce_odr global %struct.foo zeroinitializer, align 4
-; CHECK: .section .bss,"wb",discard,_thingy_linkonce
+; CHECK: .section .bss,"bw",discard,_thingy_linkonce
diff --git a/test/MC/COFF/const-gv-with-rel-init.ll b/test/MC/COFF/const-gv-with-rel-init.ll
index 7d3c5f631881..5d0460dbaeff 100644
--- a/test/MC/COFF/const-gv-with-rel-init.ll
+++ b/test/MC/COFF/const-gv-with-rel-init.ll
@@ -5,7 +5,7 @@ define void @f() {
 }
 
 @ptr = constant void ()* @f, section ".CRT$XLB", align 8
-; CHECK:  .section  .CRT$XLB,"rd"
+; CHECK:  .section  .CRT$XLB,"dr"
 
 @weak_array = weak_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void ()* @f to i8*)]
-; CHECK:  .section  .rdata,"rd",discard,weak_array
+; CHECK:  .section  .rdata,"dr",discard,weak_array
diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s
index 820272a40bf4..5111600c7449 100644
--- a/test/MC/COFF/diff.s
+++ b/test/MC/COFF/diff.s
@@ -1,5 +1,23 @@
 // RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s
 
+.section baz, "xr"
+	.def	X
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	X
+X:
+	mov	Y-X+42,	%eax
+	retl
+
+	.def	Y
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	Y
+Y:
+	retl
+
 	.def	 _foobar;
 	.scl	2;
 	.type	32;
@@ -30,3 +48,10 @@ _rust_crate:
 // CHECK:        SectionData (
 // CHECK-NEXT:     0000: 00000000 00000000 1C000000 20000000
 // CHECK-NEXT:   )
+
+// CHECK:        Name: baz
+// CHECK:        Relocations [
+// CHECK-NEXT:   ]
+// CHECK:        SectionData (
+// CHECK-NEXT:     0000: A1300000 00C3C3
+// CHECK-NEXT:   )
diff --git a/test/MC/COFF/global_ctors_dtors.ll b/test/MC/COFF/global_ctors_dtors.ll
index ca17f24a68e0..be92c27c0d8a 100644
--- a/test/MC/COFF/global_ctors_dtors.ll
+++ b/test/MC/COFF/global_ctors_dtors.ll
@@ -49,17 +49,17 @@ define i32 @main() nounwind {
   ret i32 0
 }
 
-; WIN32: .section .CRT$XCU,"rd"
+; WIN32: .section .CRT$XCU,"dr"
 ; WIN32: a_global_ctor
-; WIN32: .section .CRT$XCU,"rd",associative,{{_?}}b
+; WIN32: .section .CRT$XCU,"dr",associative,{{_?}}b
 ; WIN32: b_global_ctor
 ; WIN32-NOT: c_global_ctor
-; WIN32: .section .CRT$XTX,"rd"
+; WIN32: .section .CRT$XTX,"dr"
 ; WIN32: a_global_dtor
-; MINGW32: .section .ctors,"wd"
+; MINGW32: .section .ctors,"dw"
 ; MINGW32: a_global_ctor
-; MINGW32: .section .ctors,"wd",associative,{{_?}}b
+; MINGW32: .section .ctors,"dw",associative,{{_?}}b
 ; MINGW32: b_global_ctor
 ; MINGW32-NOT: c_global_ctor
-; MINGW32: .section .dtors,"wd"
+; MINGW32: .section .dtors,"dw"
 ; MINGW32: a_global_dtor
diff --git a/test/MC/COFF/initialised-data.ll b/test/MC/COFF/initialised-data.ll
index c4284696b8ca..a2faac748567 100644
--- a/test/MC/COFF/initialised-data.ll
+++ b/test/MC/COFF/initialised-data.ll
@@ -3,5 +3,5 @@
 
 @data = dllexport constant [5 x i8] c"data\00", align 1
 
-; CHECK: .section	.rdata,"rd"
+; CHECK: .section	.rdata,"dr"
 
diff --git a/test/MC/COFF/section-passthru-flags.s b/test/MC/COFF/section-passthru-flags.s
index 3bd061b391d1..96e42d23351e 100644
--- a/test/MC/COFF/section-passthru-flags.s
+++ b/test/MC/COFF/section-passthru-flags.s
@@ -3,5 +3,5 @@
 // CHECK: .section .klaatu,"wn"
 .section .barada,"y"
 // CHECK: .section .barada,"y"
-.section .nikto,"wds"
-// CHECK: .section .nikto,"wds"
+.section .nikto,"dws"
+// CHECK: .section .nikto,"dws"
diff --git a/test/MC/COFF/weak-symbol.ll b/test/MC/COFF/weak-symbol.ll
index fd78307c1f22..0ab860cad462 100644
--- a/test/MC/COFF/weak-symbol.ll
+++ b/test/MC/COFF/weak-symbol.ll
@@ -28,20 +28,20 @@ define weak void @f() section ".sect" {
 }
 
 ; Weak global
-; X86: .section .data,"rd",discard,_a
+; X86: .section .data,"dr",discard,_a
 ; X86: .globl _a
 ; X86: .zero 12
 ;
-; X64: .section .data,"rd",discard,a
+; X64: .section .data,"dr",discard,a
 ; X64: .globl a
 ; X64: .zero 12
 @a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
 
-; X86:  .section        .tls$,"wd",discard,_b
+; X86:  .section        .tls$,"dw",discard,_b
 ; X86:  .globl  _b
 ; X86:  .long   0
 ;
-; X64:  .section        .tls$,"wd",discard,b
+; X64:  .section        .tls$,"dw",discard,b
 ; X64:  .globl  b
 ; X64:  .long   0
 
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 155d36f60e21..3ec8831def18 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -192,4 +192,13 @@ define void @test11(i32 *%P) {
   ; CHECK-NEXT: ret void
 }
 
-
+; CHECK-LABEL: @test12(
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+  %load0 = load i32* %P1
+  %1 = load atomic i32* %P2 seq_cst, align 4
+  %load1 = load i32* %P1
+  %sel = select i1 %B, i32 %load0, i32 %load1
+  ret i32 %sel
+  ; CHECK: load i32* %P1
+  ; CHECK: load i32* %P1
+}
diff --git a/test/Transforms/Inline/inline-indirect.ll b/test/Transforms/Inline/inline-indirect.ll
new file mode 100644
index 000000000000..f6eb528e0650
--- /dev/null
+++ b/test/Transforms/Inline/inline-indirect.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -disable-output 2>/dev/null
+; This test used to trigger an assertion in the assumption cache when
+; inlining the indirect call
+declare void @llvm.assume(i1)
+
+define void @foo() {
+  ret void
+}
+
+define void @bar(void ()*) {
+  call void @llvm.assume(i1 true)
+  call void %0();
+  ret void
+}
+
+define void @baz() {
+  call void @bar(void ()* @foo)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
index ad6a11cf6eb1..3d18ac0e3344 100644
--- a/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
 define i32 @test_load_cast_combine_tbaa(float* %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
 ; CHECK-LABEL: @test_load_cast_combine_tbaa(
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 26b294042d42..f4edf092641f 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -201,7 +201,7 @@ for.end:                                          ; preds = %for.body
 ;
 ; Currently we have three extra add.w's that keep the store address
 ; live past the next increment because ISEL is unfortunately undoing
-; the store chain. ISEL also fails to convert all but one of the stores to
+; the store chain. ISEL also fails to convert the stores to
 ; post-increment addressing. However, the loads should use
 ; post-increment addressing, no add's or add.w's beyond the three
 ; mentioned. Most importantly, there should be no spills or reloads!
@@ -210,7 +210,7 @@ for.end:                                          ; preds = %for.body
 ; A9: %.lr.ph
 ; A9-NOT: lsl.w
 ; A9-NOT: {{ldr|str|adds|add r}}
-; A9: vst1.8 {{.*}} [r{{[0-9]+}}]!
+; A9: add.w r
 ; A9-NOT: {{ldr|str|adds|add r}}
 ; A9: add.w r
 ; A9-NOT: {{ldr|str|adds|add r}}
diff --git a/test/Transforms/MemCpyOpt/callslot_aa.ll b/test/Transforms/MemCpyOpt/callslot_aa.ll
new file mode 100644
index 000000000000..b6ea129ccfa7
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/callslot_aa.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%T = type { i64, i64 }
+
+define void @test(i8* %src) {
+  %tmp = alloca i8
+  %dst = alloca i8
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i32 8, i1 false), !noalias !2
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i32 8, i1 false)
+
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+
+; Check that the noalias for "dst" was removed by checking that the metadata is gone
+; CHECK-NOT: "dst"
+!0 = !{!0}
+!1 = distinct !{!1, !0, !"dst"}
+!2 = distinct !{!1}
diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll
new file mode 100644
index 000000000000..38ed18dad2ac
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) {
+; Ensure we can handle x86_mmx values which are primitive and can be bitcast
+; with integer types but can't be put into a vector.
+;
+; CHECK-LABEL: @test1
+; CHECK:         store i64
+; CHECK:         store i64
+; CHECK:         ret void
+entry:
+  %a.cast = bitcast x86_mmx %a to i64
+  %b.cast = bitcast x86_mmx %b to i64
+  %a.and = and i64 %a.cast, 42
+  %b.and = and i64 %b.cast, 42
+  %gep = getelementptr i64* %ptr, i32 1
+  store i64 %a.and, i64* %ptr
+  store i64 %b.and, i64* %gep
+  ret void
+}
+
+define void @test2(x86_mmx %a, x86_mmx %b) {
+; Same as @test1 but using phi-input vectorization instead of store
+; vectorization.
+;
+; CHECK-LABEL: @test2
+; CHECK:         and i64
+; CHECK:         and i64
+; CHECK:         ret void
+entry:
+  br i1 undef, label %if.then, label %exit
+
+if.then:
+  %a.cast = bitcast x86_mmx %a to i64
+  %b.cast = bitcast x86_mmx %b to i64
+  %a.and = and i64 %a.cast, 42
+  %b.and = and i64 %b.cast, 42
+  br label %exit
+
+exit:
+  %a.phi = phi i64 [ 0, %entry ], [ %a.and, %if.then ]
+  %b.phi = phi i64 [ 0, %entry ], [ %b.and, %if.then ]
+  tail call void @f(i64 %a.phi, i64 %b.phi)
+  ret void
+}
+
+declare void @f(i64, i64)
diff --git a/test/Transforms/Util/combine-alias-scope-metadata.ll b/test/Transforms/Util/combine-alias-scope-metadata.ll
new file mode 100644
index 000000000000..fd0a3d5c5b92
--- /dev/null
+++ b/test/Transforms/Util/combine-alias-scope-metadata.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i8* noalias dereferenceable(1) %in, i8* noalias dereferenceable(1) %out) {
+  %tmp = alloca i8
+  %tmp2 = alloca i8
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i32 8, i1 false), !alias.scope !4
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i32 8, i1 false), !alias.scope !5
+
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %tmp2, i64 1, i32 8, i1 false), !noalias !6
+
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+
+!0 = !{!0}
+!1 = distinct !{!1, !0, !"in"}
+!2 = distinct !{!2, !0, !"tmp"}
+!3 = distinct !{!3, !0, !"tmp2"}
+!4 = distinct !{!1, !2}
+!5 = distinct !{!2, !3}
+!6 = distinct !{!1, !2}
diff --git a/test/tools/gold/no-map-whole-file.ll b/test/tools/gold/no-map-whole-file.ll
new file mode 100644
index 000000000000..21a0c46d28b0
--- /dev/null
+++ b/test/tools/gold/no-map-whole-file.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as -o %t.bc %s
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so -plugin-opt=emit-llvm \
+; RUN:    --no-map-whole-files -r -o %t2.bc %t.bc
+; RUN: llvm-dis < %t2.bc -o - | FileCheck %s
+
+; CHECK: main
+define i32 @main() {
+  ret i32 0
+}
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 5524bb9922c5..a9909a721c1b 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -559,11 +559,9 @@ static void freeSymName(ld_plugin_symbol &Sym) {
 }
 
 static std::unique_ptr<Module>
-getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,
+getModuleForFile(LLVMContext &Context, claimed_file &F,
+                 off_t Filesize, raw_fd_ostream *ApiFile,
                  StringSet<> &Internalize, StringSet<> &Maybe) {
-  ld_plugin_input_file File;
-  if (get_input_file(F.handle, &File) != LDPS_OK)
-    message(LDPL_FATAL, "Failed to get file information");
 
   if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK)
     message(LDPL_FATAL, "Failed to get symbol information");
@@ -572,7 +570,7 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,
   if (get_view(F.handle, &View) != LDPS_OK)
     message(LDPL_FATAL, "Failed to get a view of file");
 
-  MemoryBufferRef BufferRef(StringRef((const char *)View, File.filesize), "");
+  MemoryBufferRef BufferRef(StringRef((const char *)View, Filesize), "");
   ErrorOr<std::unique_ptr<object::IRObjectFile>> ObjOrErr =
       object::IRObjectFile::create(BufferRef, Context);
 
@@ -580,9 +578,6 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,
     message(LDPL_FATAL, "Could not read bitcode from file : %s",
             EC.message().c_str());
 
-  if (release_input_file(F.handle) != LDPS_OK)
-    message(LDPL_FATAL, "Failed to release file information");
-
   object::IRObjectFile &Obj = **ObjOrErr;
 
   Module &M = Obj.getModule();
@@ -798,8 +793,12 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {
   StringSet<> Internalize;
   StringSet<> Maybe;
   for (claimed_file &F : Modules) {
+    ld_plugin_input_file File;
+    if (get_input_file(F.handle, &File) != LDPS_OK)
+      message(LDPL_FATAL, "Failed to get file information");
     std::unique_ptr<Module> M =
-        getModuleForFile(Context, F, ApiFile, Internalize, Maybe);
+        getModuleForFile(Context, F, File.filesize, ApiFile,
+                         Internalize, Maybe);
     if (!options::triple.empty())
       M->setTargetTriple(options::triple.c_str());
     else if (M->getTargetTriple().empty()) {
@@ -808,6 +807,8 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {
 
     if (L.linkInModule(M.get()))
       message(LDPL_FATAL, "Failed to link module");
+    if (release_input_file(F.handle) != LDPS_OK)
+      message(LDPL_FATAL, "Failed to release file information");
   }
 
   for (const auto &Name : Internalize) {