vendor/llvm/llvm-trunk-r321017

author: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
commit: 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree: 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/WebAssembly
parent: eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
47 files changed, 1117 insertions, 528 deletions
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
index 78b2cdb61b76..68b68bd797b5 100644
--- a/lib/Target/WebAssembly/CMakeLists.txt
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_target(WebAssemblyCodeGen
   WebAssemblyInstrInfo.cpp
   WebAssemblyLowerBrUnless.cpp
   WebAssemblyLowerEmscriptenEHSjLj.cpp
+  WebAssemblyLowerGlobalDtors.cpp
   WebAssemblyMachineFunctionInfo.cpp
   WebAssemblyMCInstLower.cpp
   WebAssemblyOptimizeLiveIntervals.cpp
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index f31dde0ce48f..c3f0f2787146 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -18,6 +18,7 @@
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -25,7 +26,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 1357cb5735f8..226a3b35f2cf 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -41,7 +41,8 @@ public:
                   const MCValue &Target, MutableArrayRef<char> Data,
                   uint64_t Value, bool IsPCRel) const override;
 
-  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+  std::unique_ptr<MCObjectWriter>
+  createObjectWriter(raw_pwrite_stream &OS) const override;
 
   // No instruction requires relaxation
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -82,7 +83,8 @@ public:
                   const MCValue &Target, MutableArrayRef<char> Data,
                   uint64_t Value, bool IsPCRel) const override;
 
-  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+  std::unique_ptr<MCObjectWriter>
+  createObjectWriter(raw_pwrite_stream &OS) const override;
 
   // No instruction requires relaxation
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -131,7 +133,7 @@ void WebAssemblyAsmBackendELF::applyFixup(const MCAssembler &Asm,
     Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
 }
 
-MCObjectWriter *
+std::unique_ptr<MCObjectWriter>
 WebAssemblyAsmBackendELF::createObjectWriter(raw_pwrite_stream &OS) const {
   return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
 }
@@ -191,7 +193,7 @@ void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm,
     Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
 }
 
-MCObjectWriter *
+std::unique_ptr<MCObjectWriter>
 WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createWebAssemblyWasmObjectWriter(OS, Is64Bit);
 }
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
index 2146f67959b8..b67ecfa455b3 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
@@ -16,6 +16,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -58,10 +59,10 @@ unsigned WebAssemblyELFObjectWriter::getRelocType(MCContext &Ctx,
   }
 }
 
-MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
-                                                       bool Is64Bit,
-                                                       uint8_t OSABI) {
-  MCELFObjectTargetWriter *MOTW =
-      new WebAssemblyELFObjectWriter(Is64Bit, OSABI);
-  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+std::unique_ptr<MCObjectWriter>
+llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+                                       bool Is64Bit,
+                                       uint8_t OSABI) {
+  auto MOTW = llvm::make_unique<WebAssemblyELFObjectWriter>(Is64Bit, OSABI);
+  return createELFObjectWriter(std::move(MOTW), OS, /*IsLittleEndian=*/true);
 }
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 3e3b52fca569..77744e53d62f 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -61,8 +61,13 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
   uint64_t Start = OS.tell();
 
   uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
-  assert(Binary < UINT8_MAX && "Multi-byte opcodes not supported yet");
-  OS << uint8_t(Binary);
+  if (Binary <= UINT8_MAX) {
+    OS << uint8_t(Binary);
+  } else {
+    assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
+    OS << uint8_t(Binary >> 8)
+       << uint8_t(Binary);
+  }
 
   // For br_table instructions, encode the size of the table. In the MCInst,
   // there's an index operand, one operand for each table entry, and the
@@ -116,10 +121,9 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
     } else if (MO.isExpr()) {
       const MCOperandInfo &Info = Desc.OpInfo[i];
       llvm::MCFixupKind FixupKind;
-      size_t PaddedSize;
+      size_t PaddedSize = 5;
       if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
         FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
-        PaddedSize = 5;
       } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
         FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
         PaddedSize = 10;
@@ -127,10 +131,8 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
                  Info.OperandType == WebAssembly::OPERAND_OFFSET32 ||
                  Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
         FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
-        PaddedSize = 5;
       } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
         FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index);
-        PaddedSize = 5;
       } else {
         llvm_unreachable("unexpected symbolic operand kind");
       }
@@ -138,7 +140,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
           OS.tell() - Start, MO.getExpr(),
           FixupKind, MI.getLoc()));
       ++MCNumFixups;
-      encodeULEB128(0, OS, PaddedSize - 1);
+      encodeULEB128(0, OS, PaddedSize);
     } else {
       llvm_unreachable("unexpected operand kind");
     }
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 9580eeaa33d7..18de4273d1d0 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -41,15 +41,6 @@ static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
   return new WebAssemblyMCAsmInfo(TT);
 }
 
-static void adjustCodeGenOpts(const Triple & /*TT*/, Reloc::Model /*RM*/,
-                              CodeModel::Model &CM) {
-  CodeModel::Model M = (CM == CodeModel::Default || CM == CodeModel::JITDefault)
-                           ? CodeModel::Large
-                           : CM;
-  if (M != CodeModel::Large)
-    report_fatal_error("Non-large code models are not supported yet");
-}
-
 static MCInstrInfo *createMCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitWebAssemblyMCInstrInfo(X);
@@ -115,9 +106,6 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
     // Register the MC instruction info.
     TargetRegistry::RegisterMCInstrInfo(*T, createMCInstrInfo);
 
-    // Register the MC codegen info.
-    TargetRegistry::registerMCAdjustCodeGenOpts(*T, adjustCodeGenOpts);
-
     // Register the MC register info.
     TargetRegistry::RegisterMCRegInfo(*T, createMCRegisterInfo);
 
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 4d676c32a09c..7dca89ab822d 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -18,6 +18,7 @@
 #include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/DataTypes.h"
+#include <memory>
 
 namespace llvm {
 
@@ -39,11 +40,13 @@ MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
 
 MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
 
-MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
-                                                 bool Is64Bit, uint8_t OSABI);
+std::unique_ptr<MCObjectWriter>
+createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+                                 bool Is64Bit, uint8_t OSABI);
 
-MCObjectWriter *createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
-                                                  bool Is64Bit);
+std::unique_ptr<MCObjectWriter>
+createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
+                                  bool Is64Bit);
 
 namespace WebAssembly {
 enum OperandType {
@@ -111,6 +114,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::LOAD8_U_I32:
   case WebAssembly::LOAD8_S_I64:
   case WebAssembly::LOAD8_U_I64:
+  case WebAssembly::ATOMIC_LOAD8_U_I32:
+  case WebAssembly::ATOMIC_LOAD8_U_I64:
   case WebAssembly::STORE8_I32:
   case WebAssembly::STORE8_I64:
     return 0;
@@ -118,6 +123,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::LOAD16_U_I32:
   case WebAssembly::LOAD16_S_I64:
   case WebAssembly::LOAD16_U_I64:
+  case WebAssembly::ATOMIC_LOAD16_U_I32:
+  case WebAssembly::ATOMIC_LOAD16_U_I64:
   case WebAssembly::STORE16_I32:
   case WebAssembly::STORE16_I64:
     return 1;
@@ -128,11 +135,14 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::LOAD32_S_I64:
   case WebAssembly::LOAD32_U_I64:
   case WebAssembly::STORE32_I64:
+  case WebAssembly::ATOMIC_LOAD_I32:
+  case WebAssembly::ATOMIC_LOAD32_U_I64:
     return 2;
   case WebAssembly::LOAD_I64:
   case WebAssembly::LOAD_F64:
   case WebAssembly::STORE_I64:
   case WebAssembly::STORE_F64:
+  case WebAssembly::ATOMIC_LOAD_I64:
     return 3;
   default:
     llvm_unreachable("Only loads and stores have p2align values");
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 00bf02469bdd..0ca52ad651b5 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -108,10 +108,6 @@ void WebAssemblyTargetAsmStreamer::emitGlobal(
   }
 }
 
-void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) {
-  OS << "\t.stack_pointer\t" << Index << '\n';
-}
-
 void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
 
 void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
@@ -157,11 +153,6 @@ void WebAssemblyTargetELFStreamer::emitGlobal(
   llvm_unreachable(".globalvar encoding not yet implemented");
 }
 
-void WebAssemblyTargetELFStreamer::emitStackPointer(
-    uint32_t Index) {
-  llvm_unreachable(".stack_pointer encoding not yet implemented");
-}
-
 void WebAssemblyTargetELFStreamer::emitEndFunc() {
   Streamer.EmitIntValue(WebAssembly::End, 1);
 }
@@ -219,8 +210,8 @@ void WebAssemblyTargetWasmStreamer::emitGlobal(
   // section. This will later be decoded and turned into contents for the
   // Globals Section.
   Streamer.PushSection();
-  Streamer.SwitchSection(Streamer.getContext()
-                                 .getWasmSection(".global_variables", 0, 0));
+  Streamer.SwitchSection(Streamer.getContext().getWasmSection(
+      ".global_variables", SectionKind::getMetadata()));
   for (const wasm::Global &G : Globals) {
     Streamer.EmitIntValue(int32_t(G.Type), 1);
     Streamer.EmitIntValue(G.Mutable, 1);
@@ -238,14 +229,6 @@ void WebAssemblyTargetWasmStreamer::emitGlobal(
   Streamer.PopSection();
 }
 
-void WebAssemblyTargetWasmStreamer::emitStackPointer(uint32_t Index) {
-  Streamer.PushSection();
-  Streamer.SwitchSection(Streamer.getContext()
-                                 .getWasmSection(".stack_pointer", 0, 0));
-  Streamer.EmitIntValue(Index, 4);
-  Streamer.PopSection();
-}
-
 void WebAssemblyTargetWasmStreamer::emitEndFunc() {
   llvm_unreachable(".end_func is not needed for direct wasm output");
 }
@@ -277,4 +260,5 @@ void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType(
 }
 
 void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) {
+  llvm_unreachable(".global_import is not needed for direct wasm output");
 }
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 102d7219a1e7..2cb21a20580b 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -39,8 +39,6 @@ public:
   virtual void emitLocal(ArrayRef<MVT> Types) = 0;
   /// .globalvar
   virtual void emitGlobal(ArrayRef<wasm::Global> Globals) = 0;
-  /// .stack_pointer
-  virtual void emitStackPointer(uint32_t Index) = 0;
   /// .endfunc
   virtual void emitEndFunc() = 0;
   /// .functype
@@ -67,7 +65,6 @@ public:
   void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
   void emitLocal(ArrayRef<MVT> Types) override;
   void emitGlobal(ArrayRef<wasm::Global> Globals) override;
-  void emitStackPointer(uint32_t Index) override;
   void emitEndFunc() override;
   void emitIndirectFunctionType(MCSymbol *Symbol,
                                 SmallVectorImpl<MVT> &Params,
@@ -85,7 +82,6 @@ public:
   void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
   void emitLocal(ArrayRef<MVT> Types) override;
   void emitGlobal(ArrayRef<wasm::Global> Globals) override;
-  void emitStackPointer(uint32_t Index) override;
   void emitEndFunc() override;
   void emitIndirectFunctionType(MCSymbol *Symbol,
                                 SmallVectorImpl<MVT> &Params,
@@ -103,7 +99,6 @@ public:
   void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
   void emitLocal(ArrayRef<MVT> Types) override;
   void emitGlobal(ArrayRef<wasm::Global> Globals) override;
-  void emitStackPointer(uint32_t Index) override;
   void emitEndFunc() override;
   void emitIndirectFunctionType(MCSymbol *Symbol,
                                 SmallVectorImpl<MVT> &Params,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 9cf77829f3bc..39abde26df7f 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/MC/MCWasmObjectWriter.h"
 #include "llvm/MC/MCValue.h"
@@ -73,7 +74,7 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
   case WebAssembly::fixup_code_sleb128_i32:
     if (IsFunction)
       return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
-    return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB;
+    return wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB;
   case WebAssembly::fixup_code_sleb128_i64:
     llvm_unreachable("fixup_sleb128_i64 not implemented yet");
   case WebAssembly::fixup_code_uleb128_i32:
@@ -81,11 +82,11 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
       return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
     if (IsFunction)
       return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
-    return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB;
+    return wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB;
   case FK_Data_4:
     if (IsFunction)
       return wasm::R_WEBASSEMBLY_TABLE_INDEX_I32;
-    return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32;
+    return wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32;
   case FK_Data_8:
     llvm_unreachable("FK_Data_8 not implemented yet");
   default:
@@ -93,8 +94,9 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
   }
 }
 
-MCObjectWriter *llvm::createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
-                                                        bool Is64Bit) {
-  MCWasmObjectTargetWriter *MOTW = new WebAssemblyWasmObjectWriter(Is64Bit);
-  return createWasmObjectWriter(MOTW, OS);
+std::unique_ptr<MCObjectWriter>
+llvm::createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
+                                        bool Is64Bit) {
+  auto MOTW = llvm::make_unique<WebAssemblyWasmObjectWriter>(Is64Bit);
+  return createWasmObjectWriter(std::move(MOTW), OS);
 }
diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index f310f0a44461..a2c03b1a0400 100644
--- a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -30,7 +30,7 @@ Target &llvm::getTheWebAssemblyTarget64() {
 
 extern "C" void LLVMInitializeWebAssemblyTargetInfo() {
   RegisterTarget<Triple::wasm32> X(getTheWebAssemblyTarget32(), "wasm32",
-                                   "WebAssembly 32-bit");
+                                   "WebAssembly 32-bit", "WebAssembly");
   RegisterTarget<Triple::wasm64> Y(getTheWebAssemblyTarget64(), "wasm64",
-                                   "WebAssembly 64-bit");
+                                   "WebAssembly 64-bit", "WebAssembly");
 }
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
index e04c4db19c8c..7ac6c3991531 100644
--- a/lib/Target/WebAssembly/WebAssembly.h
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -28,6 +28,7 @@ class FunctionPass;
 // LLVM IR passes.
 ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj);
 void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &);
+ModulePass *createWebAssemblyLowerGlobalDtors();
 ModulePass *createWebAssemblyFixFunctionBitcasts();
 FunctionPass *createWebAssemblyOptimizeReturned();
 
diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td
index f647349d759b..99cf1f119a20 100644
--- a/lib/Target/WebAssembly/WebAssembly.td
+++ b/lib/Target/WebAssembly/WebAssembly.td
@@ -25,6 +25,12 @@ include "llvm/Target/Target.td"
 
 def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true",
                                       "Enable 128-bit SIMD">;
+def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
+                                      "Enable Atomics">;
+def FeatureNontrappingFPToInt :
+      SubtargetFeature<"nontrapping-fptoint",
+                       "HasNontrappingFPToInt", "true",
+                       "Enable non-trapping float-to-int conversion operators">;
 
 //===----------------------------------------------------------------------===//
 // Architectures.
@@ -55,7 +61,8 @@ def : ProcessorModel<"mvp", NoSchedModel, []>;
 def : ProcessorModel<"generic", NoSchedModel, []>;
 
 // Latest and greatest experimental version of WebAssembly. Bugs included!
-def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>;
+def : ProcessorModel<"bleeding-edge", NoSchedModel,
+                      [FeatureSIMD128, FeatureAtomics]>;
 
 //===----------------------------------------------------------------------===//
 // Target Declaration
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 211358ad66cd..d19463ccb51f 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -90,10 +90,13 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
   for (const auto &G : M.globals()) {
     if (!G.hasInitializer() && G.hasExternalLinkage()) {
-      uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType());
-      getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier());
-      OutStreamer->emitELFSize(getSymbol(&G),
-                               MCConstantExpr::create(Size, OutContext));
+      if (G.getValueType()->isSized()) {
+        uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType());
+        if (TM.getTargetTriple().isOSBinFormatELF())
+          getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier());
+        OutStreamer->emitELFSize(getSymbol(&G),
+                                 MCConstantExpr::create(Size, OutContext));
+      }
     }
   }
 }
@@ -111,7 +114,7 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
   getTargetStreamer()->emitParam(CurrentFnSym, MFI->getParams());
 
   SmallVector<MVT, 4> ResultVTs;
-  const Function &F(*MF->getFunction());
+  const Function &F = MF->getFunction();
 
   // Emit the function index.
   if (MDNode *Idx = F.getMetadata("wasm.index")) {
@@ -267,12 +270,11 @@ bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   if (AsmVariant != 0)
     report_fatal_error("There are no defined alternate asm variants");
 
-  if (!ExtraCode) {
-    // TODO: For now, we just hard-code 0 as the constant offset; teach
-    // SelectInlineAsmMemoryOperand how to do address mode matching.
-    OS << "0(" + regToString(MI->getOperand(OpNo)) + ')';
-    return false;
-  }
+  // The current approach to inline asm is that "r" constraints are expressed
+  // as local indices, rather than values on the operand stack. This simplifies
+  // using "r" as it eliminates the need to push and pop the values in a
+  // particular order, however it also makes it impossible to have an "m"
+  // constraint. So we don't support it.
 
   return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index c8917b8d7e48..a37f8bcf6ba5 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H
 
+#include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCStreamer.h"
@@ -17,7 +18,6 @@
 
 namespace llvm {
 class MCSymbol;
-class WebAssemblyFunctionInfo;
 class WebAssemblyTargetStreamer;
 class WebAssemblyMCInstLower;
 
diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index b2330a232093..1af92f02d8e0 100644
--- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -27,7 +27,7 @@
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 41249117ae0e..e2edb924d4d2 100644
--- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -294,6 +294,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
 
         unsigned OldReg = MO.getReg();
 
+        // Inline asm may have a def in the middle of the operands. Our contract
+        // with inline asm register operands is to provide local indices as
+        // immediates.
+        if (MO.isDef()) {
+          assert(MI.getOpcode() == TargetOpcode::INLINEASM);
+          unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+          MRI.removeRegOperandFromUseList(&MO);
+          MO = MachineOperand::CreateImm(LocalId);
+          continue;
+        }
+
         // If we see a stackified register, prepare to insert subsequent
         // get_locals before the start of its tree.
         if (MFI.isVRegStackified(OldReg)) {
@@ -301,6 +312,15 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
           continue;
         }
 
+        // Our contract with inline asm register operands is to provide local
+        // indices as immediates.
+        if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+          unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+          MRI.removeRegOperandFromUseList(&MO);
+          MO = MachineOperand::CreateImm(LocalId);
+          continue;
+        }
+
         // Insert a get_local.
         unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
         const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index c980f4b87f91..7e284ea950fd 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -275,7 +275,10 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
           }
           if (S == 1 && Addr.isRegBase() && Addr.getReg() == 0) {
             // An unscaled add of a register. Set it as the new base.
-            Addr.setReg(getRegForValue(Op));
+            unsigned Reg = getRegForValue(Op);
+            if (Reg == 0)
+              return false;
+            Addr.setReg(Reg);
             break;
           }
           if (canFoldAddIntoGEP(U, Op)) {
@@ -359,7 +362,10 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
   if (Addr.isSet()) {
     return false;
   }
-  Addr.setReg(getRegForValue(Obj));
+  unsigned Reg = getRegForValue(Obj);
+  if (Reg == 0)
+    return false;
+  Addr.setReg(Reg);
   return Addr.getReg() != 0;
 }
 
@@ -418,7 +424,10 @@ unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
   }
 
   Not = false;
-  return maskI1Value(getRegForValue(V), V);
+  unsigned Reg = getRegForValue(V);
+  if (Reg == 0)
+    return 0;
+  return maskI1Value(Reg, V);
 }
 
 unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
@@ -535,13 +544,19 @@ unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
 unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
   MVT::SimpleValueType From = getSimpleType(V->getType());
   MVT::SimpleValueType To = getLegalType(From);
-  return zeroExtend(getRegForValue(V), V, From, To);
+  unsigned VReg = getRegForValue(V);
+  if (VReg == 0)
+    return 0;
+  return zeroExtend(VReg, V, From, To);
 }
 
 unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
   MVT::SimpleValueType From = getSimpleType(V->getType());
   MVT::SimpleValueType To = getLegalType(From);
-  return zeroExtend(getRegForValue(V), V, From, To);
+  unsigned VReg = getRegForValue(V);
+  if (VReg == 0)
+    return 0;
+  return signExtend(VReg, V, From, To);
 }
 
 unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
@@ -700,9 +715,12 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
   if (Func && Func->isIntrinsic())
     return false;
 
+  bool IsDirect = Func != nullptr;
+  if (!IsDirect && isa<ConstantExpr>(Call->getCalledValue()))
+    return false;
+
   FunctionType *FuncTy = Call->getFunctionType();
   unsigned Opc;
-  bool IsDirect = Func != nullptr;
   bool IsVoid = FuncTy->getReturnType()->isVoidTy();
   unsigned ResultReg;
   if (IsVoid) {
@@ -794,8 +812,12 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
 
   if (IsDirect)
     MIB.addGlobalAddress(Func);
-  else
-    MIB.addReg(getRegForValue(Call->getCalledValue()));
+  else {
+    unsigned Reg = getRegForValue(Call->getCalledValue());
+    if (Reg == 0)
+      return false;
+    MIB.addReg(Reg);
+  }
 
   for (unsigned ArgReg : Args)
     MIB.addReg(ArgReg);
@@ -885,7 +907,10 @@ bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
   const Value *Op = ZExt->getOperand(0);
   MVT::SimpleValueType From = getSimpleType(Op->getType());
   MVT::SimpleValueType To = getLegalType(getSimpleType(ZExt->getType()));
-  unsigned Reg = zeroExtend(getRegForValue(Op), Op, From, To);
+  unsigned In = getRegForValue(Op);
+  if (In == 0)
+    return false;
+  unsigned Reg = zeroExtend(In, Op, From, To);
   if (Reg == 0)
     return false;
 
@@ -899,7 +924,10 @@ bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
   const Value *Op = SExt->getOperand(0);
   MVT::SimpleValueType From = getSimpleType(Op->getType());
   MVT::SimpleValueType To = getLegalType(getSimpleType(SExt->getType()));
-  unsigned Reg = signExtend(getRegForValue(Op), Op, From, To);
+  unsigned In = getRegForValue(Op);
+  if (In == 0)
+    return false;
+  unsigned Reg = signExtend(In, Op, From, To);
   if (Reg == 0)
     return false;
 
@@ -1041,15 +1069,18 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
   if (!VT.isSimple() || !RetVT.isSimple())
     return false;
 
+  unsigned In = getRegForValue(I->getOperand(0));
+  if (In == 0)
+    return false;
+
   if (VT == RetVT) {
     // No-op bitcast.
-    updateValueMap(I, getRegForValue(I->getOperand(0)));
+    updateValueMap(I, In);
     return true;
   }
 
   unsigned Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(),
-                                        getRegForValue(I->getOperand(0)),
-                                        I->getOperand(0)->hasOneUse());
+                                        In, I->getOperand(0)->hasOneUse());
   if (!Reg)
     return false;
   MachineBasicBlock::iterator Iter = FuncInfo.InsertPt;
diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index 76a2ff3f9803..666337acccce 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -24,6 +24,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssembly.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -35,6 +36,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-fix-function-bitcasts"
 
+static cl::opt<bool> TemporaryWorkarounds(
+  "wasm-temporary-workarounds",
+  cl::desc("Apply certain temporary workarounds"),
+  cl::init(true), cl::Hidden);
+
 namespace {
 class FixFunctionBitcasts final : public ModulePass {
   StringRef getPassName() const override {
@@ -68,10 +74,19 @@ static void FindUses(Value *V, Function &F,
     if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
       FindUses(BC, F, Uses, ConstantBCs);
     else if (U.get()->getType() != F.getType()) {
+      CallSite CS(U.getUser());
+      if (!CS)
+        // Skip uses that aren't immediately called
+        continue;
+      Value *Callee = CS.getCalledValue();
+      if (Callee != V)
+        // Skip calls where the function isn't the callee
+        continue;
       if (isa<Constant>(U.get())) {
         // Only add constant bitcasts to the list once; they get RAUW'd
         auto c = ConstantBCs.insert(cast<Constant>(U.get()));
-        if (!c.second) continue;
+        if (!c.second)
+          continue;
       }
       Uses.push_back(std::make_pair(&U, &F));
     }
@@ -97,9 +112,10 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
   // Determine what arguments to pass.
   SmallVector<Value *, 4> Args;
   Function::arg_iterator AI = Wrapper->arg_begin();
+  Function::arg_iterator AE = Wrapper->arg_end();
   FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
   FunctionType::param_iterator PE = F->getFunctionType()->param_end();
-  for (; AI != Wrapper->arg_end() && PI != PE; ++AI, ++PI) {
+  for (; AI != AE && PI != PE; ++AI, ++PI) {
     if (AI->getType() != *PI) {
       Wrapper->eraseFromParent();
       return nullptr;
@@ -108,6 +124,9 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
   }
   for (; PI != PE; ++PI)
     Args.push_back(UndefValue::get(*PI));
+  if (F->isVarArg())
+    for (; AI != AE; ++AI)
+      Args.push_back(&*AI);
 
   CallInst *Call = CallInst::Create(F, Args, "", BB);
 
@@ -128,11 +147,41 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
 }
 
 bool FixFunctionBitcasts::runOnModule(Module &M) {
+  Function *Main = nullptr;
+  CallInst *CallMain = nullptr;
   SmallVector<std::pair<Use *, Function *>, 0> Uses;
   SmallPtrSet<Constant *, 2> ConstantBCs;
 
   // Collect all the places that need wrappers.
-  for (Function &F : M) FindUses(&F, F, Uses, ConstantBCs);
+  for (Function &F : M) {
+    FindUses(&F, F, Uses, ConstantBCs);
+
+    // If we have a "main" function, and its type isn't
+    // "int main(int argc, char *argv[])", create an artificial call with it
+    // bitcasted to that type so that we generate a wrapper for it, so that
+    // the C runtime can call it.
+    if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") {
+      Main = &F;
+      LLVMContext &C = M.getContext();
+      Type *MainArgTys[] = {
+        PointerType::get(Type::getInt8PtrTy(C), 0),
+        Type::getInt32Ty(C)
+      };
+      FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
+                                               /*isVarArg=*/false);
+      if (F.getFunctionType() != MainTy) {
+        Value *Args[] = {
+          UndefValue::get(MainArgTys[0]),
+          UndefValue::get(MainArgTys[1])
+        };
+        Value *Casted = ConstantExpr::getBitCast(Main,
+                                                 PointerType::get(MainTy, 0));
+        CallMain = CallInst::Create(Casted, Args, "call_main");
+        Use *UseMain = &CallMain->getOperandUse(2);
+        Uses.push_back(std::make_pair(UseMain, &F));
+      }
+    }
+  }
 
   DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
 
@@ -148,9 +197,9 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
     if (!Ty)
       continue;
 
-    // Wasm varargs are not ABI-compatible with non-varargs. Just ignore
-    // such casts for now.
-    if (Ty->isVarArg() || F->isVarArg())
+    // Bitcasted vararg functions occur in Emscripten's implementation of
+    // EM_ASM, so suppress wrappers for them for now.
+    if (TemporaryWorkarounds && (Ty->isVarArg() || F->isVarArg()))
       continue;
 
     auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
@@ -167,5 +216,19 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
       U->set(Wrapper);
   }
 
+  // If we created a wrapper for main, rename the wrapper so that it's the
+  // one that gets called from startup.
+  if (CallMain) {
+    Main->setName("__original_main");
+    Function *MainWrapper =
+        cast<Function>(CallMain->getCalledValue()->stripPointerCasts());
+    MainWrapper->setName("main");
+    MainWrapper->setLinkage(Main->getLinkage());
+    MainWrapper->setVisibility(Main->getVisibility());
+    Main->setLinkage(Function::PrivateLinkage);
+    Main->setVisibility(Function::DefaultVisibility);
+    delete CallMain;
+  }
+
   return true;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 41f315c2825b..88daea7e3681 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -205,8 +205,7 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
       continue;
 
     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
-    DEBUG(dbgs() << "MBB#" << MBB->getNumber() << " has index " << Index
-                 << "\n");
+    DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index << "\n");
 
     Pair.first->second = Index;
     for (auto Pred : MBB->predecessors())
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index a37d6136e44e..84246052f601 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -94,7 +94,7 @@ bool WebAssemblyFrameLowering::needsSPWriteback(
     const MachineFunction &MF, const MachineFrameInfo &MFI) const {
   assert(needsSP(MF, MFI));
   return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() ||
-         MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+         MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
 }
 
 static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index bf326fce88fa..4cc7f5ae058a 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
 
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 
 namespace llvm {
 class MachineFrameInfo;
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 4f3ae57733e5..9f40d35689a5 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -48,9 +48,8 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
-    ForCodeSize =
-        MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
-        MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+    ForCodeSize = MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize) ||
+                  MF.getFunction().hasFnAttribute(Attribute::MinSize);
     Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
   }
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 814377003cbc..299009fa6674 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "WebAssemblyTargetMachine.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -115,8 +116,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
 
   // As a special case, these operators use the type to mean the type to
   // sign-extend from.
-  for (auto T : {MVT::i1, MVT::i8, MVT::i16, MVT::i32})
-    setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  if (!Subtarget->hasAtomics()) {
+    // The Atomics feature includes signext intructions.
+    for (auto T : {MVT::i8, MVT::i16, MVT::i32})
+      setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+  }
 
   // Dynamic stack allocation: use the default expansion.
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -146,6 +151,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
 
   // Trap lowers to wasm unreachable
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  setMaxAtomicSizeInBitsSupported(64);
 }
 
 FastISel *WebAssemblyTargetLowering::createFastISel(
@@ -178,6 +185,160 @@ MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
   return Result;
 }
 
+// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
+// undefined result on invalid/overflow, to the WebAssembly opcode, which
+// traps on invalid/overflow.
+static MachineBasicBlock *
+LowerFPToInt(
+    MachineInstr &MI,
+    DebugLoc DL,
+    MachineBasicBlock *BB,
+    const TargetInstrInfo &TII,
+    bool IsUnsigned,
+    bool Int64,
+    bool Float64,
+    unsigned LoweredOpcode
+) {
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+  unsigned OutReg = MI.getOperand(0).getReg();
+  unsigned InReg = MI.getOperand(1).getReg();
+
+  unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
+  unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
+  unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
+  unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
+  unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
+  unsigned Eqz = WebAssembly::EQZ_I32;
+  unsigned And = WebAssembly::AND_I32;
+  int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
+  int64_t Substitute = IsUnsigned ? 0 : Limit;
+  double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
+  auto &Context = BB->getParent()->getFunction().getContext();
+  Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  MachineFunction::iterator It = ++BB->getIterator();
+  F->insert(It, FalseMBB);
+  F->insert(It, TrueMBB);
+  F->insert(It, DoneMBB);
+
+  // Transfer the remainder of BB and its successor edges to DoneMBB.
+  DoneMBB->splice(DoneMBB->begin(), BB,
+                  std::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  BB->addSuccessor(TrueMBB);
+  BB->addSuccessor(FalseMBB);
+  TrueMBB->addSuccessor(DoneMBB);
+  FalseMBB->addSuccessor(DoneMBB);
+
+  unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
+  Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
+  Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
+  CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+  EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+  FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
+  TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
+
+  MI.eraseFromParent();
+  // For signed numbers, we can do a single comparison to determine whether
+  // fabs(x) is within range.
+  if (IsUnsigned) {
+    Tmp0 = InReg;
+  } else {
+    BuildMI(BB, DL, TII.get(Abs), Tmp0)
+        .addReg(InReg);
+  }
+  BuildMI(BB, DL, TII.get(FConst), Tmp1)
+      .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
+  BuildMI(BB, DL, TII.get(LT), CmpReg)
+      .addReg(Tmp0)
+      .addReg(Tmp1);
+
+  // For unsigned numbers, we have to do a separate comparison with zero.
+  if (IsUnsigned) {
+    Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
+    unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    BuildMI(BB, DL, TII.get(FConst), Tmp1)
+        .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
+    BuildMI(BB, DL, TII.get(GE), SecondCmpReg)
+        .addReg(Tmp0)
+        .addReg(Tmp1);
+    BuildMI(BB, DL, TII.get(And), AndReg)
+        .addReg(CmpReg)
+        .addReg(SecondCmpReg);
+    CmpReg = AndReg;
+  }
+
+  BuildMI(BB, DL, TII.get(Eqz), EqzReg)
+      .addReg(CmpReg);
+
+  // Create the CFG diamond to select between doing the conversion or using
+  // the substitute value.
+  BuildMI(BB, DL, TII.get(WebAssembly::BR_IF))
+      .addMBB(TrueMBB)
+      .addReg(EqzReg);
+  BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg)
+      .addReg(InReg);
+  BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR))
+      .addMBB(DoneMBB);
+  BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg)
+      .addImm(Substitute);
+  BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
+      .addReg(FalseReg)
+      .addMBB(FalseMBB)
+      .addReg(TrueReg)
+      .addMBB(TrueMBB);
+
+  return DoneMBB;
+}
+
+MachineBasicBlock *
+WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
+    MachineInstr &MI,
+    MachineBasicBlock *BB
+) const {
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  switch (MI.getOpcode()) {
+  default: llvm_unreachable("Unexpected instr type to insert");
+  case WebAssembly::FP_TO_SINT_I32_F32:
+    return LowerFPToInt(MI, DL, BB, TII, false, false, false,
+                        WebAssembly::I32_TRUNC_S_F32);
+  case WebAssembly::FP_TO_UINT_I32_F32:
+    return LowerFPToInt(MI, DL, BB, TII, true, false, false,
+                        WebAssembly::I32_TRUNC_U_F32);
+  case WebAssembly::FP_TO_SINT_I64_F32:
+    return LowerFPToInt(MI, DL, BB, TII, false, true, false,
+                        WebAssembly::I64_TRUNC_S_F32);
+  case WebAssembly::FP_TO_UINT_I64_F32:
+    return LowerFPToInt(MI, DL, BB, TII, true, true, false,
+                        WebAssembly::I64_TRUNC_U_F32);
+  case WebAssembly::FP_TO_SINT_I32_F64:
+    return LowerFPToInt(MI, DL, BB, TII, false, false, true,
+                        WebAssembly::I32_TRUNC_S_F64);
+  case WebAssembly::FP_TO_UINT_I32_F64:
+    return LowerFPToInt(MI, DL, BB, TII, true, false, true,
+                        WebAssembly::I32_TRUNC_U_F64);
+  case WebAssembly::FP_TO_SINT_I64_F64:
+    return LowerFPToInt(MI, DL, BB, TII, false, true, true,
+                        WebAssembly::I64_TRUNC_S_F64);
+  case WebAssembly::FP_TO_UINT_I64_F64:
+    return LowerFPToInt(MI, DL, BB, TII, true, true, true,
+                        WebAssembly::I64_TRUNC_U_F64);
+  llvm_unreachable("Unexpected instruction to emit with custom inserter");
+  }
+}
+
 const char *WebAssemblyTargetLowering::getTargetNodeName(
     unsigned Opcode) const {
   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
@@ -233,7 +394,8 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                       const AddrMode &AM,
                                                       Type *Ty,
-                                                      unsigned AS) const {
+                                                      unsigned AS,
+                                                      Instruction *I) const {
   // WebAssembly offsets are added as unsigned without wrapping. The
   // isLegalAddressingMode gives us no way to determine if wrapping could be
   // happening, so we approximate this by accepting only non-negative offsets.
@@ -276,7 +438,7 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) {
   MachineFunction &MF = DAG.getMachineFunction();
   DAG.getContext()->diagnose(
-      DiagnosticInfoUnsupported(*MF.getFunction(), msg, DL.getDebugLoc()));
+      DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc()));
 }
 
 // Test whether the given calling convention is supported.
@@ -313,7 +475,7 @@ SDValue WebAssemblyTargetLowering::LowerCall(
   // required, fail. Otherwise, just disable them.
   if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
        MF.getTarget().Options.GuaranteedTailCallOpt) ||
-      (CLI.CS && CLI.CS->isMustTailCall()))
+      (CLI.CS && CLI.CS.isMustTailCall()))
     fail(DL, DAG, "WebAssembly doesn't support tail call yet");
   CLI.IsTailCall = false;
 
@@ -535,7 +697,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
   // Record the number and types of results.
   SmallVector<MVT, 4> Params;
   SmallVector<MVT, 4> Results;
-  ComputeSignatureVTs(*MF.getFunction(), DAG.getTarget(), Params, Results);
+  ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results);
   for (MVT VT : Results)
     MFI->addResult(VT);
 
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 99d3d0d558f5..7bb8e71ab974 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H
 
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
 
 namespace llvm {
 
@@ -48,6 +48,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
                            const TargetLibraryInfo *LibInfo) const override;
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
+  MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr &MI,
+                              MachineBasicBlock *MBB) const override;
   const char *getTargetNodeName(unsigned Opcode) const override;
   std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
       const TargetRegisterInfo *TRI, StringRef Constraint,
@@ -55,7 +58,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   bool isCheapToSpeculateCttz() const override;
   bool isCheapToSpeculateCtlz() const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
-                             unsigned AS) const override;
+                             unsigned AS,
+                             Instruction *I = nullptr) const override;
   bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
                                       bool *Fast) const override;
   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 64415658ed81..a49172df158f 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -12,19 +12,185 @@
 ///
 //===----------------------------------------------------------------------===//
 
-// TODO: Implement atomic instructions.
-
 //===----------------------------------------------------------------------===//
-// Atomic fences
+// Atomic loads
 //===----------------------------------------------------------------------===//
 
-// TODO: add atomic fences here...
+let Defs = [ARGUMENTS] in {
+def ATOMIC_LOAD_I32 : WebAssemblyLoad<I32, "i32.atomic.load", 0xfe10>;
+def ATOMIC_LOAD_I64 : WebAssemblyLoad<I64, "i64.atomic.load", 0xfe11>;
+} // Defs = [ARGUMENTS]
 
-//===----------------------------------------------------------------------===//
-// Atomic loads
-//===----------------------------------------------------------------------===//
+// Select loads with no constant offset.
+let Predicates = [HasAtomics] in {
+def : LoadPatNoOffset<i32, atomic_load_32, ATOMIC_LOAD_I32>;
+def : LoadPatNoOffset<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+
+// Select loads with a constant offset.
+
+// Pattern with address + immediate offset
+def : LoadPatImmOff<i32, atomic_load_32, regPlusImm, ATOMIC_LOAD_I32>;
+def : LoadPatImmOff<i64, atomic_load_64, regPlusImm, ATOMIC_LOAD_I64>;
+def : LoadPatImmOff<i32, atomic_load_32, or_is_add, ATOMIC_LOAD_I32>;
+def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>;
+
+def : LoadPatGlobalAddr<i32, atomic_load_32, ATOMIC_LOAD_I32>;
+def : LoadPatGlobalAddr<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+
+def : LoadPatExternalSym<i32, atomic_load_32, ATOMIC_LOAD_I32>;
+def : LoadPatExternalSym<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+
+
+// Select loads with just a constant offset.
+def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
+def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+
+def : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
+def : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+
+def : LoadPatExternSymOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
+def : LoadPatExternSymOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+
+} // Predicates = [HasAtomics]
+
+// Extending loads. Note that there are only zero-extending atomic loads, no
+// sign-extending loads.
+let Defs = [ARGUMENTS] in {
+def ATOMIC_LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load8_u", 0xfe12>;
+def ATOMIC_LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load16_u", 0xfe13>;
+def ATOMIC_LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load8_u", 0xfe14>;
+def ATOMIC_LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load16_u", 0xfe15>;
+def ATOMIC_LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load32_u", 0xfe16>;
+} // Defs = [ARGUMENTS]
+
+// Fragments for exending loads. These are different from regular loads because
+// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and
+// therefore don't have the extension type field. So instead of matching that,
+// we match the patterns that the type legalizer expands them to.
 
-// TODO: add atomic loads here...
+// We directly match zext patterns and select the zext atomic loads.
+// i32 (zext (i8 (atomic_load_8))) gets legalized to
+// i32 (and (i32 (atomic_load_8)), 255)
+// These can be selected to a single zero-extending atomic load instruction.
+def zext_aload_8 : PatFrag<(ops node:$addr),
+                           (and (i32 (atomic_load_8 node:$addr)), 255)>;
+def zext_aload_16 : PatFrag<(ops node:$addr),
+                            (and (i32 (atomic_load_16 node:$addr)), 65535)>;
+// Unlike regular loads, extension to i64 is handled differently than i32.
+// i64 (zext (i8 (atomic_load_8))) gets legalized to
+// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255)
+def zext_aload_8_64 :
+  PatFrag<(ops node:$addr),
+          (and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>;
+def zext_aload_16_64 :
+  PatFrag<(ops node:$addr),
+          (and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>;
+def zext_aload_32_64 :
+  PatFrag<(ops node:$addr),
+          (zext (i32 (atomic_load node:$addr)))>;
+
+// We don't have single sext atomic load instructions. So for sext loads, we
+// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit
+// results) and select a zext load; the next instruction will be sext_inreg
+// which is selected by itself.
+def anyext_aload_8_64 :
+  PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_8 node:$addr)))>;
+def anyext_aload_16_64 :
+  PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>;
+
+let Predicates = [HasAtomics] in {
+// Select zero-extending loads with no constant offset.
+def : LoadPatNoOffset<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatNoOffset<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatNoOffset<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatNoOffset<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+def : LoadPatNoOffset<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
+
+// Select sign-extending loads with no constant offset
+def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatNoOffset<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatNoOffset<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i64
+
+
+// Zero-extending loads with constant offset
+def : LoadPatImmOff<i32, zext_aload_8, regPlusImm, ATOMIC_LOAD8_U_I32>;
+def : LoadPatImmOff<i32, zext_aload_16, regPlusImm, ATOMIC_LOAD16_U_I32>;
+def : LoadPatImmOff<i32, zext_aload_8, or_is_add, ATOMIC_LOAD8_U_I32>;
+def : LoadPatImmOff<i32, zext_aload_16, or_is_add, ATOMIC_LOAD16_U_I32>;
+def : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>;
+def : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>;
+def : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, ATOMIC_LOAD32_U_I64>;
+def : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>;
+def : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>;
+def : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, ATOMIC_LOAD32_U_I64>;
+
+// Sign-extending loads with constant offset
+def : LoadPatImmOff<i32, atomic_load_8, regPlusImm, ATOMIC_LOAD8_U_I32>;
+def : LoadPatImmOff<i32, atomic_load_16, regPlusImm, ATOMIC_LOAD16_U_I32>;
+def : LoadPatImmOff<i32, atomic_load_8, or_is_add, ATOMIC_LOAD8_U_I32>;
+def : LoadPatImmOff<i32, atomic_load_16, or_is_add, ATOMIC_LOAD16_U_I32>;
+def : LoadPatImmOff<i64, anyext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>;
+def : LoadPatImmOff<i64, anyext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>;
+def : LoadPatImmOff<i64, anyext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>;
+def : LoadPatImmOff<i64, anyext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>;
+// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64
+
+def : LoadPatGlobalAddr<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatGlobalAddr<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatGlobalAddr<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatGlobalAddr<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+def : LoadPatGlobalAddr<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
+def : LoadPatGlobalAddr<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatGlobalAddr<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatGlobalAddr<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatGlobalAddr<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+
+def : LoadPatExternalSym<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatExternalSym<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatExternalSym<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatExternalSym<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+def : LoadPatExternalSym<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
+def : LoadPatExternalSym<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatExternalSym<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatExternalSym<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatExternalSym<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+
+
+// Extending loads with just a constant offset
+def : LoadPatOffsetOnly<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatOffsetOnly<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatOffsetOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatOffsetOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+def : LoadPatOffsetOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
+def : LoadPatOffsetOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatOffsetOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatOffsetOnly<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatOffsetOnly<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+
+def : LoadPatGlobalAddrOffOnly<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+
+def : LoadPatExternSymOffOnly<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatExternSymOffOnly<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatExternSymOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatExternSymOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+def : LoadPatExternSymOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
+def : LoadPatExternSymOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
+def : LoadPatExternSymOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
+def : LoadPatExternSymOffOnly<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
+def : LoadPatExternSymOffOnly<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+
+
+} // Predicates = [HasAtomics]
 
 //===----------------------------------------------------------------------===//
 // Atomic stores
@@ -45,3 +211,4 @@
 // Store-release-exclusives.
 
 // And clear exclusive.
+
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index 29483ba663d5..426c2c802172 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -26,6 +26,24 @@ def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src),
                          [(set I64:$dst, (zext I32:$src))],
                          "i64.extend_u/i32\t$dst, $src", 0xad>;
 
+let Predicates = [HasAtomics] in {
+def I32_EXTEND8_S_I32 : I<(outs I32:$dst), (ins I32:$src),
+                          [(set I32:$dst, (sext_inreg I32:$src, i8))],
+                          "i32.extend8_s\t$dst, $src", 0xc0>;
+def I32_EXTEND16_S_I32 : I<(outs I32:$dst), (ins I32:$src),
+                           [(set I32:$dst, (sext_inreg I32:$src, i16))],
+                           "i32.extend16_s\t$dst, $src", 0xc1>;
+def I64_EXTEND8_S_I64 : I<(outs I64:$dst), (ins I64:$src),
+                           [(set I64:$dst, (sext_inreg I64:$src, i8))],
+                           "i64.extend8_s\t$dst, $src", 0xc2>;
+def I64_EXTEND16_S_I64 : I<(outs I64:$dst), (ins I64:$src),
+                           [(set I64:$dst, (sext_inreg I64:$src, i16))],
+                           "i64.extend16_s\t$dst, $src", 0xc3>;
+def I64_EXTEND32_S_I64 : I<(outs I64:$dst), (ins I64:$src),
+                           [(set I64:$dst, (sext_inreg I64:$src, i32))],
+                           "i64.extend32_s\t$dst, $src", 0xc4>;
+} // Predicates = [HasAtomics]
+
 } // defs = [ARGUMENTS]
 
 // Expand a "don't care" extend into zero-extend (chosen over sign-extend
@@ -35,32 +53,88 @@ def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>;
 
 let Defs = [ARGUMENTS] in {
 
+// Conversion from floating point to integer instructions which don't trap on
+// overflow or invalid.
+def I32_TRUNC_S_SAT_F32 : I<(outs I32:$dst), (ins F32:$src),
+                            [(set I32:$dst, (fp_to_sint F32:$src))],
+                            "i32.trunc_s:sat/f32\t$dst, $src", 0xfc00>,
+                            Requires<[HasNontrappingFPToInt]>;
+def I32_TRUNC_U_SAT_F32 : I<(outs I32:$dst), (ins F32:$src),
+                            [(set I32:$dst, (fp_to_uint F32:$src))],
+                            "i32.trunc_u:sat/f32\t$dst, $src", 0xfc01>,
+                            Requires<[HasNontrappingFPToInt]>;
+def I64_TRUNC_S_SAT_F32 : I<(outs I64:$dst), (ins F32:$src),
+                            [(set I64:$dst, (fp_to_sint F32:$src))],
+                            "i64.trunc_s:sat/f32\t$dst, $src", 0xfc04>,
+                            Requires<[HasNontrappingFPToInt]>;
+def I64_TRUNC_U_SAT_F32 : I<(outs I64:$dst), (ins F32:$src),
+                            [(set I64:$dst, (fp_to_uint F32:$src))],
+                            "i64.trunc_u:sat/f32\t$dst, $src", 0xfc05>,
+                            Requires<[HasNontrappingFPToInt]>;
+def I32_TRUNC_S_SAT_F64 : I<(outs I32:$dst), (ins F64:$src),
+                            [(set I32:$dst, (fp_to_sint F64:$src))],
+                            "i32.trunc_s:sat/f64\t$dst, $src", 0xfc02>,
+                            Requires<[HasNontrappingFPToInt]>;
+def I32_TRUNC_U_SAT_F64 : I<(outs I32:$dst), (ins F64:$src),
+                            [(set I32:$dst, (fp_to_uint F64:$src))],
+                            "i32.trunc_u:sat/f64\t$dst, $src", 0xfc03>,
+                            Requires<[HasNontrappingFPToInt]>;
+def I64_TRUNC_S_SAT_F64 : I<(outs I64:$dst), (ins F64:$src),
+                            [(set I64:$dst, (fp_to_sint F64:$src))],
+                            "i64.trunc_s:sat/f64\t$dst, $src", 0xfc06>,
+                            Requires<[HasNontrappingFPToInt]>;
+def I64_TRUNC_U_SAT_F64 : I<(outs I64:$dst), (ins F64:$src),
+                            [(set I64:$dst, (fp_to_uint F64:$src))],
+                            "i64.trunc_u:sat/f64\t$dst, $src", 0xfc07>,
+                            Requires<[HasNontrappingFPToInt]>;
+
+// Conversion from floating point to integer pseudo-instructions which don't
+// trap on overflow or invalid.
+let usesCustomInserter = 1, isCodeGenOnly = 1 in {
+def FP_TO_SINT_I32_F32 : I<(outs I32:$dst), (ins F32:$src),
+                        [(set I32:$dst, (fp_to_sint F32:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+def FP_TO_UINT_I32_F32 : I<(outs I32:$dst), (ins F32:$src),
+                        [(set I32:$dst, (fp_to_uint F32:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+def FP_TO_SINT_I64_F32 : I<(outs I64:$dst), (ins F32:$src),
+                        [(set I64:$dst, (fp_to_sint F32:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+def FP_TO_UINT_I64_F32 : I<(outs I64:$dst), (ins F32:$src),
+                        [(set I64:$dst, (fp_to_uint F32:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+def FP_TO_SINT_I32_F64 : I<(outs I32:$dst), (ins F64:$src),
+                        [(set I32:$dst, (fp_to_sint F64:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+def FP_TO_UINT_I32_F64 : I<(outs I32:$dst), (ins F64:$src),
+                        [(set I32:$dst, (fp_to_uint F64:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+def FP_TO_SINT_I64_F64 : I<(outs I64:$dst), (ins F64:$src),
+                        [(set I64:$dst, (fp_to_sint F64:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+def FP_TO_UINT_I64_F64 : I<(outs I64:$dst), (ins F64:$src),
+                        [(set I64:$dst, (fp_to_uint F64:$src))], "", 0>,
+                        Requires<[NotHasNontrappingFPToInt]>;
+} // usesCustomInserter, isCodeGenOnly = 1
+
 // Conversion from floating point to integer traps on overflow and invalid.
 let hasSideEffects = 1 in {
 def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src),
-                        [(set I32:$dst, (fp_to_sint F32:$src))],
-                        "i32.trunc_s/f32\t$dst, $src", 0xa8>;
+                        [], "i32.trunc_s/f32\t$dst, $src", 0xa8>;
 def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src),
-                        [(set I32:$dst, (fp_to_uint F32:$src))],
-                        "i32.trunc_u/f32\t$dst, $src", 0xa9>;
+                        [], "i32.trunc_u/f32\t$dst, $src", 0xa9>;
 def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src),
-                        [(set I64:$dst, (fp_to_sint F32:$src))],
-                        "i64.trunc_s/f32\t$dst, $src", 0xae>;
+                        [], "i64.trunc_s/f32\t$dst, $src", 0xae>;
 def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src),
-                        [(set I64:$dst, (fp_to_uint F32:$src))],
-                        "i64.trunc_u/f32\t$dst, $src", 0xaf>;
+                        [], "i64.trunc_u/f32\t$dst, $src", 0xaf>;
 def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src),
-                        [(set I32:$dst, (fp_to_sint F64:$src))],
-                        "i32.trunc_s/f64\t$dst, $src", 0xaa>;
+                        [], "i32.trunc_s/f64\t$dst, $src", 0xaa>;
 def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src),
-                        [(set I32:$dst, (fp_to_uint F64:$src))],
-                        "i32.trunc_u/f64\t$dst, $src", 0xab>;
+                        [], "i32.trunc_u/f64\t$dst, $src", 0xab>;
 def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src),
-                        [(set I64:$dst, (fp_to_sint F64:$src))],
-                        "i64.trunc_s/f64\t$dst, $src", 0xb0>;
+                        [], "i64.trunc_s/f64\t$dst, $src", 0xb0>;
 def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src),
-                        [(set I64:$dst, (fp_to_uint F64:$src))],
-                        "i64.trunc_u/f64\t$dst, $src", 0xb1>;
+                        [], "i64.trunc_u/f64\t$dst, $src", 0xb1>;
 } // hasSideEffects = 1
 
 def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src),
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 5b2498402571..4f41fcc232e9 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -32,6 +32,10 @@ class SIMD_I<dag oops, dag iops, list<dag> pattern,
              string asmstr = "", bits<32> inst = -1>
     : I<oops, iops, pattern, asmstr, inst>, Requires<[HasSIMD128]>;
 
+class ATOMIC_I<dag oops, dag iops, list<dag> pattern,
+               string asmstr = "", bits<32> inst = -1>
+    : I<oops, iops, pattern, asmstr, inst>, Requires<[HasAtomics]>;
+
 // Unary and binary instructions, for the local types that WebAssembly supports.
 multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst, bits<32> i64Inst> {
   def _I32 : I<(outs I32:$dst), (ins I32:$src),
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index df6c937a364b..eb74106336ed 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -17,7 +17,7 @@
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H
 
 #include "WebAssemblyRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "WebAssemblyGenInstrInfo.inc"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index fa2146f7db84..f8d311ac3b00 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -20,6 +20,16 @@ def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
 def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
 def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
                            AssemblerPredicate<"FeatureSIMD128", "simd128">;
+def HasAtomics : Predicate<"Subtarget->hasAtomics()">,
+                           AssemblerPredicate<"FeatureAtomics", "atomics">;
+def HasNontrappingFPToInt :
+    Predicate<"Subtarget->hasNontrappingFPToInt()">,
+              AssemblerPredicate<"FeatureNontrappingFPToInt",
+                                 "nontrapping-fptoint">;
+def NotHasNontrappingFPToInt :
+    Predicate<"!Subtarget->hasNontrappingFPToInt()">,
+              AssemblerPredicate<"!FeatureNontrappingFPToInt",
+                                 "nontrapping-fptoint">;
 
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific DAG Node Types.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 365b327190ec..9d58895ca5a6 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -55,395 +55,251 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
 
 let Defs = [ARGUMENTS] in {
 
+// Defines atomic and non-atomic loads, regular and extending.
+class WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> :
+  I<(outs rc:$dst),
+    (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+    [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>;
+
 // Basic load.
 // FIXME: When we can break syntax compatibility, reorder the fields in the
 // asmstrings to match the binary encoding.
-def LOAD_I32 : I<(outs I32:$dst),
-                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                 [], "i32.load\t$dst, ${off}(${addr})${p2align}", 0x28>;
-def LOAD_I64 : I<(outs I64:$dst),
-                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                 [], "i64.load\t$dst, ${off}(${addr})${p2align}", 0x29>;
-def LOAD_F32 : I<(outs F32:$dst),
-                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                 [], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>;
-def LOAD_F64 : I<(outs F64:$dst),
-                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                 [], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>;
+def LOAD_I32 : WebAssemblyLoad<I32, "i32.load", 0x28>;
+def LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>;
+def LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>;
+def LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>;
 
 } // Defs = [ARGUMENTS]
 
 // Select loads with no constant offset.
-def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, 0, $addr)>;
-def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, 0, $addr)>;
-def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, 0, $addr)>;
-def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, 0, $addr)>;
+class LoadPatNoOffset<ValueType ty, PatFrag node, I inst> :
+  Pat<(ty (node I32:$addr)), (inst 0, 0, $addr)>;
+
+def : LoadPatNoOffset<i32, load, LOAD_I32>;
+def : LoadPatNoOffset<i64, load, LOAD_I64>;
+def : LoadPatNoOffset<f32, load, LOAD_F32>;
+def : LoadPatNoOffset<f64, load, LOAD_F64>;
+
 
 // Select loads with a constant offset.
-def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I32 0, imm:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I64 0, imm:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F32 0, imm:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F64 0, imm:$off, $addr)>;
-def : Pat<(i32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I32 0, imm:$off, $addr)>;
-def : Pat<(i64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I64 0, imm:$off, $addr)>;
-def : Pat<(f32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F32 0, imm:$off, $addr)>;
-def : Pat<(f64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F64 0, imm:$off, $addr)>;
-def : Pat<(i32 (load (regPlusGA I32:$addr,
-                                (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (load (regPlusGA I32:$addr,
-                                (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(f32 (load (regPlusGA I32:$addr,
-                                (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(f64 (load (regPlusGA I32:$addr,
-                                (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I32 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F32 0, texternalsym:$off, $addr)>;
-def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F64 0, texternalsym:$off, $addr)>;
+
+// Pattern with address + immediate offset
+class LoadPatImmOff<ValueType ty, PatFrag loadkind, PatFrag operand, I inst> :
+  Pat<(ty (loadkind (operand I32:$addr, imm:$off))),
+      (inst 0, imm:$off, $addr)>;
+
+def : LoadPatImmOff<i32, load, regPlusImm, LOAD_I32>;
+def : LoadPatImmOff<i64, load, regPlusImm, LOAD_I64>;
+def : LoadPatImmOff<f32, load, regPlusImm, LOAD_F32>;
+def : LoadPatImmOff<f64, load, regPlusImm, LOAD_F64>;
+def : LoadPatImmOff<i32, load, or_is_add, LOAD_I32>;
+def : LoadPatImmOff<i64, load, or_is_add, LOAD_I64>;
+def : LoadPatImmOff<f32, load, or_is_add, LOAD_F32>;
+def : LoadPatImmOff<f64, load, or_is_add, LOAD_F64>;
+
+class LoadPatGlobalAddr<ValueType ty, PatFrag loadkind, I inst> :
+  Pat<(ty (loadkind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))),
+      (inst 0, tglobaladdr:$off, $addr)>;
+
+def : LoadPatGlobalAddr<i32, load, LOAD_I32>;
+def : LoadPatGlobalAddr<i64, load, LOAD_I64>;
+def : LoadPatGlobalAddr<f32, load, LOAD_F32>;
+def : LoadPatGlobalAddr<f64, load, LOAD_F64>;
+
+class LoadPatExternalSym<ValueType ty, PatFrag loadkind, I inst> :
+  Pat<(ty (loadkind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
+      (inst 0, texternalsym:$off, $addr)>;
+def : LoadPatExternalSym<i32, load, LOAD_I32>;
+def : LoadPatExternalSym<i64, load, LOAD_I64>;
+def : LoadPatExternalSym<f32, load, LOAD_F32>;
+def : LoadPatExternalSym<f64, load, LOAD_F64>;
+
 
 // Select loads with just a constant offset.
-def : Pat<(i32 (load imm:$off)), (LOAD_I32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (load imm:$off)), (LOAD_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(f32 (load imm:$off)), (LOAD_F32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(f64 (load imm:$off)), (LOAD_F64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F64 0, texternalsym:$off, (CONST_I32 0))>;
+class LoadPatOffsetOnly<ValueType ty, PatFrag loadkind, I inst> :
+  Pat<(ty (loadkind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>;
+
+def : LoadPatOffsetOnly<i32, load, LOAD_I32>;
+def : LoadPatOffsetOnly<i64, load, LOAD_I64>;
+def : LoadPatOffsetOnly<f32, load, LOAD_F32>;
+def : LoadPatOffsetOnly<f64, load, LOAD_F64>;
+
+class LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag loadkind, I inst> :
+  Pat<(ty (loadkind (WebAssemblywrapper tglobaladdr:$off))),
+      (inst 0, tglobaladdr:$off, (CONST_I32 0))>;
+
+def : LoadPatGlobalAddrOffOnly<i32, load, LOAD_I32>;
+def : LoadPatGlobalAddrOffOnly<i64, load, LOAD_I64>;
+def : LoadPatGlobalAddrOffOnly<f32, load, LOAD_F32>;
+def : LoadPatGlobalAddrOffOnly<f64, load, LOAD_F64>;
+
+class LoadPatExternSymOffOnly<ValueType ty, PatFrag loadkind, I inst> :
+  Pat<(ty (loadkind (WebAssemblywrapper texternalsym:$off))),
+      (inst 0, texternalsym:$off, (CONST_I32 0))>;
+def : LoadPatExternSymOffOnly<i32, load, LOAD_I32>;
+def : LoadPatExternSymOffOnly<i64, load, LOAD_I64>;
+def : LoadPatExternSymOffOnly<f32, load, LOAD_F32>;
+def : LoadPatExternSymOffOnly<f64, load, LOAD_F64>;
 
 let Defs = [ARGUMENTS] in {
 
 // Extending load.
-def LOAD8_S_I32  : I<(outs I32:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>;
-def LOAD8_U_I32  : I<(outs I32:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>;
-def LOAD16_S_I32 : I<(outs I32:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>;
-def LOAD16_U_I32 : I<(outs I32:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>;
-def LOAD8_S_I64  : I<(outs I64:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>;
-def LOAD8_U_I64  : I<(outs I64:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>;
-def LOAD16_S_I64 : I<(outs I64:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>;
-def LOAD16_U_I64 : I<(outs I64:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>;
-def LOAD32_S_I64 : I<(outs I64:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>;
-def LOAD32_U_I64 : I<(outs I64:$dst),
-                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
-                     [], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>;
+def LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
+def LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
+def LOAD16_S_I32 : WebAssemblyLoad<I32, "i32.load16_s", 0x2e>;
+def LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.load16_u", 0x2f>;
+def LOAD8_S_I64 : WebAssemblyLoad<I64, "i64.load8_s", 0x30>;
+def LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.load8_u", 0x31>;
+def LOAD16_S_I64 : WebAssemblyLoad<I64, "i64.load16_s", 0x32>;
+def LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x32>;
+def LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>;
+def LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>;
 
 } // Defs = [ARGUMENTS]
 
 // Select extending loads with no constant offset.
-def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, 0, $addr)>;
-def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, 0, $addr)>;
-def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, 0, $addr)>;
-def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
-def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, 0, $addr)>;
-def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, 0, $addr)>;
-def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, 0, $addr)>;
-def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
-def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, 0, $addr)>;
-def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
+def : LoadPatNoOffset<i32, sextloadi8, LOAD8_S_I32>;
+def : LoadPatNoOffset<i32, zextloadi8, LOAD8_U_I32>;
+def : LoadPatNoOffset<i32, sextloadi16, LOAD16_S_I32>;
+def : LoadPatNoOffset<i32, zextloadi16, LOAD16_U_I32>;
+def : LoadPatNoOffset<i64, sextloadi8, LOAD8_S_I64>;
+def : LoadPatNoOffset<i64, zextloadi8, LOAD8_U_I64>;
+def : LoadPatNoOffset<i64, sextloadi16, LOAD16_S_I64>;
+def : LoadPatNoOffset<i64, zextloadi16, LOAD16_U_I64>;
+def : LoadPatNoOffset<i64, sextloadi32, LOAD32_S_I64>;
+def : LoadPatNoOffset<i64, zextloadi32, LOAD32_U_I64>;
 
 // Select extending loads with a constant offset.
-def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_S_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_S_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
-                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
-                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
-                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
-                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
-                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
-                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
-                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
-                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
-                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_S_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
-                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (add I32:$addr,
-                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I32 0, texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (add I32:$addr,
-                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (add I32:$addr,
-                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I32 0, texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (add I32:$addr,
-                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (add I32:$addr,
-                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (add I32:$addr,
-                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (add I32:$addr,
-                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (add I32:$addr,
-                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (add I32:$addr,
-                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_S_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (add I32:$addr,
-                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
+def : LoadPatImmOff<i32, sextloadi8, regPlusImm, LOAD8_S_I32>;
+def : LoadPatImmOff<i32, zextloadi8, regPlusImm, LOAD8_U_I32>;
+def : LoadPatImmOff<i32, sextloadi16, regPlusImm, LOAD16_S_I32>;
+def : LoadPatImmOff<i32, zextloadi16, regPlusImm, LOAD16_U_I32>;
+def : LoadPatImmOff<i64, sextloadi8, regPlusImm, LOAD8_S_I64>;
+def : LoadPatImmOff<i64, zextloadi8, regPlusImm, LOAD8_U_I64>;
+def : LoadPatImmOff<i64, sextloadi16, regPlusImm, LOAD16_S_I64>;
+def : LoadPatImmOff<i64, zextloadi16, regPlusImm, LOAD16_U_I64>;
+def : LoadPatImmOff<i64, sextloadi32, regPlusImm, LOAD32_S_I64>;
+def : LoadPatImmOff<i64, zextloadi32, regPlusImm, LOAD32_U_I64>;
+
+def : LoadPatImmOff<i32, sextloadi8, or_is_add, LOAD8_S_I32>;
+def : LoadPatImmOff<i32, zextloadi8, or_is_add, LOAD8_U_I32>;
+def : LoadPatImmOff<i32, sextloadi16, or_is_add, LOAD16_S_I32>;
+def : LoadPatImmOff<i32, zextloadi16, or_is_add, LOAD16_U_I32>;
+def : LoadPatImmOff<i64, sextloadi8, or_is_add, LOAD8_S_I64>;
+def : LoadPatImmOff<i64, zextloadi8, or_is_add, LOAD8_U_I64>;
+def : LoadPatImmOff<i64, sextloadi16, or_is_add, LOAD16_S_I64>;
+def : LoadPatImmOff<i64, zextloadi16, or_is_add, LOAD16_U_I64>;
+def : LoadPatImmOff<i64, sextloadi32, or_is_add, LOAD32_S_I64>;
+def : LoadPatImmOff<i64, zextloadi32, or_is_add, LOAD32_U_I64>;
+
+def : LoadPatGlobalAddr<i32, sextloadi8, LOAD8_S_I32>;
+def : LoadPatGlobalAddr<i32, zextloadi8, LOAD8_U_I32>;
+def : LoadPatGlobalAddr<i32, sextloadi16, LOAD16_S_I32>;
+def : LoadPatGlobalAddr<i32, zextloadi8, LOAD16_U_I32>;
+
+def : LoadPatGlobalAddr<i64, sextloadi8, LOAD8_S_I64>;
+def : LoadPatGlobalAddr<i64, zextloadi8, LOAD8_U_I64>;
+def : LoadPatGlobalAddr<i64, sextloadi16, LOAD16_S_I64>;
+def : LoadPatGlobalAddr<i64, zextloadi16, LOAD16_U_I64>;
+def : LoadPatGlobalAddr<i64, sextloadi32, LOAD32_S_I64>;
+def : LoadPatGlobalAddr<i64, zextloadi32, LOAD32_U_I64>;
+
+def : LoadPatExternalSym<i32, sextloadi8, LOAD8_S_I32>;
+def : LoadPatExternalSym<i32, zextloadi8, LOAD8_U_I32>;
+def : LoadPatExternalSym<i32, sextloadi16, LOAD16_S_I32>;
+def : LoadPatExternalSym<i32, zextloadi16, LOAD16_U_I32>;
+def : LoadPatExternalSym<i64, sextloadi8, LOAD8_S_I64>;
+def : LoadPatExternalSym<i64, zextloadi8, LOAD8_U_I64>;
+def : LoadPatExternalSym<i64, sextloadi16, LOAD16_S_I64>;
+def : LoadPatExternalSym<i64, zextloadi16, LOAD16_U_I64>;
+def : LoadPatExternalSym<i64, sextloadi32, LOAD32_S_I64>;
+def : LoadPatExternalSym<i64, zextloadi32, LOAD32_U_I64>;
+
 
 // Select extending loads with just a constant offset.
-def : Pat<(i32 (sextloadi8 imm:$off)),
-          (LOAD8_S_I32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi8 imm:$off)),
-          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (sextloadi16 imm:$off)),
-          (LOAD16_S_I32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi16 imm:$off)),
-          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi8 imm:$off)),
-          (LOAD8_S_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi8 imm:$off)),
-          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi16 imm:$off)),
-          (LOAD16_S_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi16 imm:$off)),
-          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi32 imm:$off)),
-          (LOAD32_S_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi32 imm:$off)),
-          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
+def : LoadPatOffsetOnly<i32, sextloadi8, LOAD8_S_I32>;
+def : LoadPatOffsetOnly<i32, zextloadi8, LOAD8_U_I32>;
+def : LoadPatOffsetOnly<i32, sextloadi16, LOAD16_S_I32>;
+def : LoadPatOffsetOnly<i32, zextloadi16, LOAD16_U_I32>;
+
+def : LoadPatOffsetOnly<i64, sextloadi8, LOAD8_S_I64>;
+def : LoadPatOffsetOnly<i64, zextloadi8, LOAD8_U_I64>;
+def : LoadPatOffsetOnly<i64, sextloadi16, LOAD16_S_I64>;
+def : LoadPatOffsetOnly<i64, zextloadi16, LOAD16_U_I64>;
+def : LoadPatOffsetOnly<i64, sextloadi32, LOAD32_S_I64>;
+def : LoadPatOffsetOnly<i64, zextloadi32, LOAD32_U_I64>;
+
+def : LoadPatGlobalAddrOffOnly<i32, sextloadi8, LOAD8_S_I32>;
+def : LoadPatGlobalAddrOffOnly<i32, zextloadi8, LOAD8_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i32, sextloadi16, LOAD16_S_I32>;
+def : LoadPatGlobalAddrOffOnly<i32, zextloadi16, LOAD16_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i64, sextloadi8, LOAD8_S_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, zextloadi8, LOAD8_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, sextloadi16, LOAD16_S_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, zextloadi16, LOAD16_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, sextloadi32, LOAD32_S_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, zextloadi32, LOAD32_U_I64>;
+
+def : LoadPatExternSymOffOnly<i32, sextloadi8, LOAD8_S_I32>;
+def : LoadPatExternSymOffOnly<i32, zextloadi8, LOAD8_U_I32>;
+def : LoadPatExternSymOffOnly<i32, sextloadi16, LOAD16_S_I32>;
+def : LoadPatExternSymOffOnly<i32, zextloadi16, LOAD16_U_I32>;
+def : LoadPatExternSymOffOnly<i64, sextloadi8, LOAD8_S_I64>;
+def : LoadPatExternSymOffOnly<i64, zextloadi8, LOAD8_U_I64>;
+def : LoadPatExternSymOffOnly<i64, sextloadi16, LOAD16_S_I64>;
+def : LoadPatExternSymOffOnly<i64, zextloadi16, LOAD16_U_I64>;
+def : LoadPatExternSymOffOnly<i64, sextloadi32, LOAD32_S_I64>;
+def : LoadPatExternSymOffOnly<i64, zextloadi32, LOAD32_U_I64>;
 
 // Resolve "don't care" extending loads to zero-extending loads. This is
 // somewhat arbitrary, but zero-extending is conceptually simpler.
 
 // Select "don't care" extending loads with no constant offset.
-def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, 0, $addr)>;
-def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
-def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, 0, $addr)>;
-def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
-def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
+def : LoadPatNoOffset<i32, extloadi8, LOAD8_U_I32>;
+def : LoadPatNoOffset<i32, extloadi16, LOAD16_U_I32>;
+def : LoadPatNoOffset<i64, extloadi8, LOAD8_U_I64>;
+def : LoadPatNoOffset<i64, extloadi16, LOAD16_U_I64>;
+def : LoadPatNoOffset<i64, extloadi32, LOAD32_U_I64>;
 
 // Select "don't care" extending loads with a constant offset.
-def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 0, imm:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 0, imm:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
-                                     (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
-                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
-                                     (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
-                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
-                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (add I32:$addr,
-                               (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (add I32:$addr,
-                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (add I32:$addr,
-                               (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (add I32:$addr,
-                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (add I32:$addr,
-                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
+def : LoadPatImmOff<i32, extloadi8, regPlusImm, LOAD8_U_I32>;
+def : LoadPatImmOff<i32, extloadi16, regPlusImm, LOAD16_U_I32>;
+def : LoadPatImmOff<i64, extloadi8, regPlusImm, LOAD8_U_I64>;
+def : LoadPatImmOff<i64, extloadi16, regPlusImm, LOAD16_U_I64>;
+def : LoadPatImmOff<i64, extloadi32, regPlusImm, LOAD32_U_I64>;
+def : LoadPatImmOff<i32, extloadi8, or_is_add, LOAD8_U_I32>;
+def : LoadPatImmOff<i32, extloadi16, or_is_add, LOAD16_U_I32>;
+def : LoadPatImmOff<i64, extloadi8, or_is_add, LOAD8_U_I64>;
+def : LoadPatImmOff<i64, extloadi16, or_is_add, LOAD16_U_I64>;
+def : LoadPatImmOff<i64, extloadi32, or_is_add, LOAD32_U_I64>;
+def : LoadPatGlobalAddr<i32, extloadi8, LOAD8_U_I32>;
+def : LoadPatGlobalAddr<i32, extloadi16, LOAD16_U_I32>;
+def : LoadPatGlobalAddr<i64, extloadi8, LOAD8_U_I64>;
+def : LoadPatGlobalAddr<i64, extloadi16, LOAD16_U_I64>;
+def : LoadPatGlobalAddr<i64, extloadi32, LOAD32_U_I64>;
+def : LoadPatExternalSym<i32, extloadi8, LOAD8_U_I32>;
+def : LoadPatExternalSym<i32, extloadi16, LOAD16_U_I32>;
+def : LoadPatExternalSym<i64, extloadi8, LOAD8_U_I64>;
+def : LoadPatExternalSym<i64, extloadi16, LOAD16_U_I64>;
+def : LoadPatExternalSym<i64, extloadi32, LOAD32_U_I64>;
 
 // Select "don't care" extending loads with just a constant offset.
-def : Pat<(i32 (extloadi8 imm:$off)),
-          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (extloadi16 imm:$off)),
-          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi8 imm:$off)),
-          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi16 imm:$off)),
-          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi32 imm:$off)),
-          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
-def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
-def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
-def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
+def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>;
+def : LoadPatOffsetOnly<i32, extloadi16, LOAD16_U_I32>;
+def : LoadPatOffsetOnly<i64, extloadi8, LOAD8_U_I64>;
+def : LoadPatOffsetOnly<i64, extloadi16, LOAD16_U_I64>;
+def : LoadPatOffsetOnly<i64, extloadi32, LOAD32_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i32, extloadi8, LOAD8_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i32, extloadi16, LOAD16_U_I32>;
+def : LoadPatGlobalAddrOffOnly<i64, extloadi8, LOAD8_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, extloadi16, LOAD16_U_I64>;
+def : LoadPatGlobalAddrOffOnly<i64, extloadi32, LOAD32_U_I64>;
+def : LoadPatExternSymOffOnly<i32, extloadi8, LOAD8_U_I32>;
+def : LoadPatExternSymOffOnly<i32, extloadi16, LOAD16_U_I32>;
+def : LoadPatExternSymOffOnly<i64, extloadi8, LOAD8_U_I64>;
+def : LoadPatExternSymOffOnly<i64, extloadi16, LOAD16_U_I64>;
+def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;
+
 
 let Defs = [ARGUMENTS] in {
 
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 576b71dd7966..5b867aa763a1 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -99,6 +99,13 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
         case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
         case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
         case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
+        case EQZ_I32: {
+          // Invert an eqz by replacing it with its operand.
+          Cond = Def->getOperand(1).getReg();
+          Def->eraseFromParent();
+          Inverted = true;
+          break;
+        }
         default: break;
         }
       }
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
new file mode 100644
index 000000000000..0020817aee41
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -0,0 +1,191 @@
+//===-- WebAssemblyLowerGlobalDtors.cpp - Lower @llvm.global_dtors --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Lower @llvm.global_dtors.
+///
+/// WebAssembly doesn't have a builtin way to invoke static destructors.
+/// Implement @llvm.global_dtors by creating wrapper functions that are
+/// registered in @llvm.global_ctors and which contain a call to
+/// `__cxa_atexit` to register their destructor functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-global-dtors"
+
+namespace {
+class LowerGlobalDtors final : public ModulePass {
+  StringRef getPassName() const override {
+    return "WebAssembly Lower @llvm.global_dtors";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    ModulePass::getAnalysisUsage(AU);
+  }
+
+  bool runOnModule(Module &M) override;
+
+public:
+  static char ID;
+  LowerGlobalDtors() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char LowerGlobalDtors::ID = 0;
+ModulePass *llvm::createWebAssemblyLowerGlobalDtors() {
+  return new LowerGlobalDtors();
+}
+
+bool LowerGlobalDtors::runOnModule(Module &M) {
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
+  if (!GV)
+    return false;
+
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!InitList)
+    return false;
+
+  // Sanity-check @llvm.global_dtor's type.
+  StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+  if (!ETy || ETy->getNumElements() != 3 ||
+      !ETy->getTypeAtIndex(0U)->isIntegerTy() ||
+      !ETy->getTypeAtIndex(1U)->isPointerTy() ||
+      !ETy->getTypeAtIndex(2U)->isPointerTy())
+    return false; // Not (int, ptr, ptr).
+
+  // Collect the contents of @llvm.global_dtors, collated by priority and
+  // associated symbol.
+  std::map<uint16_t, MapVector<Constant *, std::vector<Constant *> > > DtorFuncs;
+  for (Value *O : InitList->operands()) {
+    ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
+    if (!CS) continue; // Malformed.
+
+    ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+    if (!Priority) continue; // Malformed.
+    uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX);
+
+    Constant *DtorFunc = CS->getOperand(1);
+    if (DtorFunc->isNullValue())
+      break;  // Found a null terminator, skip the rest.
+
+    Constant *Associated = CS->getOperand(2);
+    Associated = cast<Constant>(Associated->stripPointerCastsNoFollowAliases());
+
+    DtorFuncs[PriorityValue][Associated].push_back(DtorFunc);
+  }
+  if (DtorFuncs.empty())
+    return false;
+
+  // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
+  LLVMContext &C = M.getContext();
+  PointerType *VoidStar = Type::getInt8PtrTy(C);
+  Type *AtExitFuncArgs[] = { VoidStar };
+  FunctionType *AtExitFuncTy = FunctionType::get(
+          Type::getVoidTy(C),
+          AtExitFuncArgs,
+          /*isVarArg=*/false);
+
+  Type *AtExitArgs[] = {
+    PointerType::get(AtExitFuncTy, 0),
+    VoidStar,
+    VoidStar
+  };
+  FunctionType *AtExitTy = FunctionType::get(
+          Type::getInt32Ty(C),
+          AtExitArgs,
+          /*isVarArg=*/false);
+  Constant *AtExit = M.getOrInsertFunction("__cxa_atexit", AtExitTy);
+
+  // Declare __dso_local.
+  Constant *DsoHandle = M.getNamedValue("__dso_handle");
+  if (!DsoHandle) {
+    Type *DsoHandleTy = Type::getInt8Ty(C);
+    GlobalVariable *Handle =
+        new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true,
+                           GlobalVariable::ExternalWeakLinkage,
+                           nullptr, "__dso_handle");
+    Handle->setVisibility(GlobalVariable::HiddenVisibility);
+    DsoHandle = Handle;
+  }
+
+  // For each unique priority level and associated symbol, generate a function
+  // to call all the destructors at that level, and a function to register the
+  // first function with __cxa_atexit.
+  for (auto &PriorityAndMore : DtorFuncs) {
+    uint16_t Priority = PriorityAndMore.first;
+    for (auto &AssociatedAndMore : PriorityAndMore.second) {
+      Constant *Associated = AssociatedAndMore.first;
+
+      Function *CallDtors = Function::Create(
+              AtExitFuncTy, Function::PrivateLinkage,
+              "call_dtors" +
+              (Priority != UINT16_MAX ?
+                 (Twine(".") + Twine(Priority)) : Twine()) +
+              (!Associated->isNullValue() ?
+                 (Twine(".") + Associated->getName()) : Twine()),
+              &M);
+      BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors);
+
+      for (auto Dtor : AssociatedAndMore.second)
+        CallInst::Create(Dtor, "", BB);
+      ReturnInst::Create(C, BB);
+
+      FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
+                                                 /*isVarArg=*/false);
+      Function *RegisterCallDtors = Function::Create(
+              VoidVoid, Function::PrivateLinkage,
+              "register_call_dtors" +
+              (Priority != UINT16_MAX ?
+                 (Twine(".") + Twine(Priority)) : Twine()) +
+              (!Associated->isNullValue() ?
+                 (Twine(".") + Associated->getName()) : Twine()),
+              &M);
+      BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors);
+      BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors);
+      BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors);
+
+      Value *Null = ConstantPointerNull::get(VoidStar);
+      Value *Args[] = { CallDtors, Null, DsoHandle };
+      Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB);
+      Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res,
+                                Constant::getNullValue(Res->getType()));
+      BranchInst::Create(FailBB, RetBB, Cmp, EntryBB);
+
+      // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave.
+      // This should be very rare, because if the process is running out of memory
+      // before main has even started, something is wrong.
+      CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap),
+                       "", FailBB);
+      new UnreachableInst(C, FailBB);
+
+      ReturnInst::Create(C, RetBB);
+
+      // Now register the registration function with @llvm.global_ctors.
+      appendToGlobalCtors(M, RegisterCallDtors, Priority, Associated);
+    }
+  }
+
+  // Now that we've lowered everything, remove @llvm.global_dtors.
+  GV->eraseFromParent();
+
+  return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 8880539804ca..4a93d4810c7d 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -43,7 +43,7 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
   if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) {
     const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
     const TargetMachine &TM = MF.getTarget();
-    const Function &CurrentFunc = *MF.getFunction();
+    const Function &CurrentFunc = MF.getFunction();
 
     SmallVector<wasm::ValType, 4> Returns;
     SmallVector<wasm::ValType, 4> Params;
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 5a3a7411ed46..ebe97848d461 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -15,14 +15,14 @@
 /// have multiple defs, and then they do, the defs are usually closely related.
 /// Later, after coalescing, tail duplication, and other optimizations, it's
 /// more common to see registers with multiple unrelated defs. This pass
-/// updates LiveIntervalAnalysis to distribute the value numbers across separate
+/// updates LiveIntervals to distribute the value numbers across separate
 /// LiveIntervals.
 ///
 //===----------------------------------------------------------------------===//
 
 #include "WebAssembly.h"
 #include "WebAssemblySubtarget.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 1462c49aa9fd..3a2876bfcde2 100644
--- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -117,7 +117,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &M
     }
   }
 
-  // Ok, we're now ready to run LiveIntervalAnalysis again.
+  // Ok, we're now ready to run the LiveIntervals analysis again.
   MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
 
   return Changed;
diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index ba39b6cdb568..2ac3a839c3c8 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -19,7 +19,7 @@
 
 #include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index ea9e3fa862ce..2bdba96ab674 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -26,7 +26,7 @@
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -107,12 +107,12 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
   } else if (RegClass == &WebAssembly::F32RegClass) {
     MI->setDesc(TII->get(WebAssembly::CONST_F32));
     ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
-        Type::getFloatTy(MF.getFunction()->getContext())));
+        Type::getFloatTy(MF.getFunction().getContext())));
     MI->addOperand(MachineOperand::CreateFPImm(Val));
   } else if (RegClass == &WebAssembly::F64RegClass) {
     MI->setDesc(TII->get(WebAssembly::CONST_F64));
     ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
-        Type::getDoubleTy(MF.getFunction()->getContext())));
+        Type::getDoubleTy(MF.getFunction().getContext())));
     MI->addOperand(MachineOperand::CreateFPImm(Val));
   } else {
     llvm_unreachable("Unexpected reg class");
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 9367464c806e..5e7ebd19fac7 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -24,7 +24,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index b1385f409fd3..c4b9e915b41e 100644
--- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -96,6 +96,13 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::LOAD16_U_I64:
       case WebAssembly::LOAD32_S_I64:
       case WebAssembly::LOAD32_U_I64:
+      case WebAssembly::ATOMIC_LOAD_I32:
+      case WebAssembly::ATOMIC_LOAD8_U_I32:
+      case WebAssembly::ATOMIC_LOAD16_U_I32:
+      case WebAssembly::ATOMIC_LOAD_I64:
+      case WebAssembly::ATOMIC_LOAD8_U_I64:
+      case WebAssembly::ATOMIC_LOAD16_U_I64:
+      case WebAssembly::ATOMIC_LOAD32_U_I64:
         RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
         break;
       case WebAssembly::STORE_I32:
diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
index 8173364fa880..22a5a9099e72 100644
--- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -29,7 +29,7 @@
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index ce39051b0555..9e122a5f1574 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -41,7 +41,8 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
                                            const std::string &FS,
                                            const TargetMachine &TM)
     : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
-      CPUString(CPU), TargetTriple(TT), FrameLowering(),
+      HasAtomics(false), HasNontrappingFPToInt(false), CPUString(CPU),
+      TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
       TLInfo(TM, *this) {}
 
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h
index f530a290fa0e..a6bf0b6d54f6 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -20,7 +20,7 @@
 #include "WebAssemblyISelLowering.h"
 #include "WebAssemblyInstrInfo.h"
 #include "WebAssemblySelectionDAGInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -30,6 +30,8 @@ namespace llvm {
 
 class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
   bool HasSIMD128;
+  bool HasAtomics;
+  bool HasNontrappingFPToInt;
 
   /// String name of used CPU.
   std::string CPUString;
@@ -74,6 +76,8 @@ public:
   // Predicates used by WebAssemblyInstrInfo.td.
   bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
   bool hasSIMD128() const { return HasSIMD128; }
+  bool hasAtomics() const { return HasAtomics; }
+  bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
 
   /// Parses features string setting specified subtarget options. Definition of
   /// function is auto generated by tblgen.
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 7b05f671bdcb..3cc19ef5fbab 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -68,12 +68,12 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
 WebAssemblyTargetMachine::WebAssemblyTargetMachine(
     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
     const TargetOptions &Options, Optional<Reloc::Model> RM,
-    CodeModel::Model CM, CodeGenOpt::Level OL)
+    Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(T,
                         TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
                                          : "e-m:e-p:32:32-i64:64-n32:64-S128",
                         TT, CPU, FS, Options, getEffectiveRelocModel(RM),
-                        CM, OL),
+                        CM ? *CM : CodeModel::Large, OL),
       TLOF(TT.isOSBinFormatELF() ?
               static_cast<TargetLoweringObjectFile*>(
                   new WebAssemblyTargetObjectFileELF()) :
@@ -175,6 +175,9 @@ void WebAssemblyPassConfig::addIRPasses() {
     // control specifically what gets lowered.
     addPass(createAtomicExpandPass());
 
+  // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls.
+  addPass(createWebAssemblyLowerGlobalDtors());
+
   // Fix function bitcasts, as WebAssembly requires caller and callee signatures
   // to match.
   addPass(createWebAssemblyFixFunctionBitcasts());
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index 52a2ef78736a..224849526514 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -28,8 +28,9 @@ class WebAssemblyTargetMachine final : public LLVMTargetMachine {
 public:
   WebAssemblyTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                            StringRef FS, const TargetOptions &Options,
-                           Optional<Reloc::Model> RM, CodeModel::Model CM,
-                           CodeGenOpt::Level OL);
+                           Optional<Reloc::Model> RM,
+                           Optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+                           bool JIT);
 
   ~WebAssemblyTargetMachine() override;
   const WebAssemblySubtarget *
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index b3ce4bd27460..2e002781f43d 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -14,8 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssemblyTargetTransformInfo.h"
+#include "llvm/CodeGen/CostTable.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "wasmtti"
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 35a67134775a..2eb73befc50b 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -11,8 +11,11 @@
 # to wasm object files (.o).
 
 # Computed gotos are not supported (Cannot select BlockAddress/BRIND)
+20071220-1.c wasm-o,O0
 20040302-1.c
+20041214-1.c O0
 20071210-1.c
+20071220-1.c wasm-s,O0
 920501-4.c
 920501-5.c
 comp-goto-1.c
@@ -74,17 +77,5 @@ pr41935.c
 pr28865.c
 widechar-2.c
 
-# crash: Running pass 'WebAssembly Explicit Locals' on function
-20020107-1.c wasm-o
-20030222-1.c wasm-o
-20071220-1.c wasm-o
-20071220-2.c wasm-o
-990130-1.c wasm-o
-pr38533.c wasm-o
-pr41239.c wasm-o
-pr43385.c wasm-o
-pr43560.c wasm-o
-pr45695.c wasm-o
-pr49279.c wasm-o
-pr49390.c wasm-o
-pr52286.c wasm-o
+# Untriaged: Assertion failure in WasmObjectWriter::applyRelocations
+20071220-2.c wasm-o,O0
author	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
commit	044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree	1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/WebAssembly
parent	eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)