40 files changed, 7904 insertions, 471 deletions
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
new file mode 100644
index 0000000000000..7a899b4b38e2a
--- /dev/null
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -0,0 +1,1454 @@
+//===-- VEAsmParser.cpp - Parse VE assembly to MCInst instructions --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEMCExpr.h"
+#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "TargetInfo/VETargetInfo.h"
+#include "VE.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-asmparser"
+
+namespace {
+
+class VEOperand;
+
+class VEAsmParser : public MCTargetAsmParser {
+  MCAsmParser &Parser;
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "VEGenAsmMatcher.inc"
+
+  /// }
+
+  // public interface of the MCTargetAsmParser.
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+  int parseRegisterName(unsigned (*matchFn)(StringRef));
+  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                        SMLoc &EndLoc) override;
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+  bool ParseDirective(AsmToken DirectiveID) override;
+
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+                                      unsigned Kind) override;
+
+  // Custom parse functions for VE specific operands.
+  OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
+  OperandMatchResultTy parseMEMAsOperand(OperandVector &Operands);
+  OperandMatchResultTy parseCCOpOperand(OperandVector &Operands);
+  OperandMatchResultTy parseRDOpOperand(OperandVector &Operands);
+  OperandMatchResultTy parseMImmOperand(OperandVector &Operands);
+  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
+  OperandMatchResultTy parseVEAsmOperand(std::unique_ptr<VEOperand> &Operand);
+
+  // Helper function to parse expression with a symbol.
+  const MCExpr *extractModifierFromExpr(const MCExpr *E,
+                                        VEMCExpr::VariantKind &Variant);
+  const MCExpr *fixupVariantKind(const MCExpr *E);
+  bool parseExpression(const MCExpr *&EVal);
+
+  // Split the mnemonic stripping conditional code and quantifiers
+  StringRef splitMnemonic(StringRef Name, SMLoc NameLoc,
+                          OperandVector *Operands);
+
+public:
+  VEAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
+              const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(Options, sti, MII), Parser(parser) {
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+  }
+};
+
+} // end anonymous namespace
+
+static const MCPhysReg I32Regs[64] = {
+    VE::SW0,  VE::SW1,  VE::SW2,  VE::SW3,  VE::SW4,  VE::SW5,  VE::SW6,
+    VE::SW7,  VE::SW8,  VE::SW9,  VE::SW10, VE::SW11, VE::SW12, VE::SW13,
+    VE::SW14, VE::SW15, VE::SW16, VE::SW17, VE::SW18, VE::SW19, VE::SW20,
+    VE::SW21, VE::SW22, VE::SW23, VE::SW24, VE::SW25, VE::SW26, VE::SW27,
+    VE::SW28, VE::SW29, VE::SW30, VE::SW31, VE::SW32, VE::SW33, VE::SW34,
+    VE::SW35, VE::SW36, VE::SW37, VE::SW38, VE::SW39, VE::SW40, VE::SW41,
+    VE::SW42, VE::SW43, VE::SW44, VE::SW45, VE::SW46, VE::SW47, VE::SW48,
+    VE::SW49, VE::SW50, VE::SW51, VE::SW52, VE::SW53, VE::SW54, VE::SW55,
+    VE::SW56, VE::SW57, VE::SW58, VE::SW59, VE::SW60, VE::SW61, VE::SW62,
+    VE::SW63};
+
+static const MCPhysReg F32Regs[64] = {
+    VE::SF0,  VE::SF1,  VE::SF2,  VE::SF3,  VE::SF4,  VE::SF5,  VE::SF6,
+    VE::SF7,  VE::SF8,  VE::SF9,  VE::SF10, VE::SF11, VE::SF12, VE::SF13,
+    VE::SF14, VE::SF15, VE::SF16, VE::SF17, VE::SF18, VE::SF19, VE::SF20,
+    VE::SF21, VE::SF22, VE::SF23, VE::SF24, VE::SF25, VE::SF26, VE::SF27,
+    VE::SF28, VE::SF29, VE::SF30, VE::SF31, VE::SF32, VE::SF33, VE::SF34,
+    VE::SF35, VE::SF36, VE::SF37, VE::SF38, VE::SF39, VE::SF40, VE::SF41,
+    VE::SF42, VE::SF43, VE::SF44, VE::SF45, VE::SF46, VE::SF47, VE::SF48,
+    VE::SF49, VE::SF50, VE::SF51, VE::SF52, VE::SF53, VE::SF54, VE::SF55,
+    VE::SF56, VE::SF57, VE::SF58, VE::SF59, VE::SF60, VE::SF61, VE::SF62,
+    VE::SF63};
+
+static const MCPhysReg F128Regs[32] = {
+    VE::Q0,  VE::Q1,  VE::Q2,  VE::Q3,  VE::Q4,  VE::Q5,  VE::Q6,  VE::Q7,
+    VE::Q8,  VE::Q9,  VE::Q10, VE::Q11, VE::Q12, VE::Q13, VE::Q14, VE::Q15,
+    VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
+    VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+
+static const MCPhysReg MISCRegs[31] = {
+    VE::USRCC,      VE::PSW,        VE::SAR,        VE::NoRegister,
+    VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
+    VE::PMCR0,      VE::PMCR1,      VE::PMCR2,      VE::PMCR3,
+    VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::NoRegister,
+    VE::PMC0,       VE::PMC1,       VE::PMC2,       VE::PMC3,
+    VE::PMC4,       VE::PMC5,       VE::PMC6,       VE::PMC7,
+    VE::PMC8,       VE::PMC9,       VE::PMC10,      VE::PMC11,
+    VE::PMC12,      VE::PMC13,      VE::PMC14};
+
+namespace {
+
+/// VEOperand - Instances of this class represent a parsed VE machine
+/// instruction.
+class VEOperand : public MCParsedAsmOperand {
+private:
+  enum KindTy {
+    k_Token,
+    k_Register,
+    k_Immediate,
+    // SX-Aurora ASX form is disp(index, base).
+    k_MemoryRegRegImm,  // base=reg, index=reg, disp=imm
+    k_MemoryRegImmImm,  // base=reg, index=imm, disp=imm
+    k_MemoryZeroRegImm, // base=0, index=reg, disp=imm
+    k_MemoryZeroImmImm, // base=0, index=imm, disp=imm
+    // SX-Aurora AS form is disp(base).
+    k_MemoryRegImm,  // base=reg, disp=imm
+    k_MemoryZeroImm, // base=0, disp=imm
+    // Other special cases for Aurora VE
+    k_CCOp,   // condition code
+    k_RDOp,   // rounding mode
+    k_MImmOp, // Special immediate value of sequential bit stream of 0 or 1.
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  struct Token {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegOp {
+    unsigned RegNum;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct MemOp {
+    unsigned Base;
+    unsigned IndexReg;
+    const MCExpr *Index;
+    const MCExpr *Offset;
+  };
+
+  struct CCOp {
+    unsigned CCVal;
+  };
+
+  struct RDOp {
+    unsigned RDVal;
+  };
+
+  struct MImmOp {
+    const MCExpr *Val;
+    bool M0Flag;
+  };
+
+  union {
+    struct Token Tok;
+    struct RegOp Reg;
+    struct ImmOp Imm;
+    struct MemOp Mem;
+    struct CCOp CC;
+    struct RDOp RD;
+    struct MImmOp MImm;
+  };
+
+public:
+  VEOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+  bool isToken() const override { return Kind == k_Token; }
+  bool isReg() const override { return Kind == k_Register; }
+  bool isImm() const override { return Kind == k_Immediate; }
+  bool isMem() const override {
+    return isMEMrri() || isMEMrii() || isMEMzri() || isMEMzii() || isMEMri() ||
+           isMEMzi();
+  }
+  bool isMEMrri() const { return Kind == k_MemoryRegRegImm; }
+  bool isMEMrii() const { return Kind == k_MemoryRegImmImm; }
+  bool isMEMzri() const { return Kind == k_MemoryZeroRegImm; }
+  bool isMEMzii() const { return Kind == k_MemoryZeroImmImm; }
+  bool isMEMri() const { return Kind == k_MemoryRegImm; }
+  bool isMEMzi() const { return Kind == k_MemoryZeroImm; }
+  bool isCCOp() const { return Kind == k_CCOp; }
+  bool isRDOp() const { return Kind == k_RDOp; }
+  bool isZero() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return Value == 0;
+    }
+    return false;
+  }
+  bool isUImm0to2() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return Value >= 0 && Value < 3;
+    }
+    return false;
+  }
+  bool isUImm1() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isUInt<1>(Value);
+    }
+    return false;
+  }
+  bool isUImm2() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isUInt<2>(Value);
+    }
+    return false;
+  }
+  bool isUImm3() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isUInt<3>(Value);
+    }
+    return false;
+  }
+  bool isUImm6() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isUInt<6>(Value);
+    }
+    return false;
+  }
+  bool isUImm7() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isUInt<7>(Value);
+    }
+    return false;
+  }
+  bool isSImm7() {
+    if (!isImm())
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isInt<7>(Value);
+    }
+    return false;
+  }
+  bool isMImm() const {
+    if (Kind != k_MImmOp)
+      return false;
+
+    // Constant case
+    if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(MImm.Val)) {
+      int64_t Value = ConstExpr->getValue();
+      return isUInt<6>(Value);
+    }
+    return false;
+  }
+
+  StringRef getToken() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const override {
+    assert((Kind == k_Register) && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert((Kind == k_Immediate) && "Invalid access!");
+    return Imm.Val;
+  }
+
+  unsigned getMemBase() const {
+    assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryRegImmImm ||
+            Kind == k_MemoryRegImm) &&
+           "Invalid access!");
+    return Mem.Base;
+  }
+
+  unsigned getMemIndexReg() const {
+    assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryZeroRegImm) &&
+           "Invalid access!");
+    return Mem.IndexReg;
+  }
+
+  const MCExpr *getMemIndex() const {
+    assert((Kind == k_MemoryRegImmImm || Kind == k_MemoryZeroImmImm) &&
+           "Invalid access!");
+    return Mem.Index;
+  }
+
+  const MCExpr *getMemOffset() const {
+    assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryRegImmImm ||
+            Kind == k_MemoryZeroImmImm || Kind == k_MemoryZeroRegImm ||
+            Kind == k_MemoryRegImm || Kind == k_MemoryZeroImm) &&
+           "Invalid access!");
+    return Mem.Offset;
+  }
+
+  void setMemOffset(const MCExpr *off) {
+    assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryRegImmImm ||
+            Kind == k_MemoryZeroImmImm || Kind == k_MemoryZeroRegImm ||
+            Kind == k_MemoryRegImm || Kind == k_MemoryZeroImm) &&
+           "Invalid access!");
+    Mem.Offset = off;
+  }
+
+  unsigned getCCVal() const {
+    assert((Kind == k_CCOp) && "Invalid access!");
+    return CC.CCVal;
+  }
+
+  unsigned getRDVal() const {
+    assert((Kind == k_RDOp) && "Invalid access!");
+    return RD.RDVal;
+  }
+
+  const MCExpr *getMImmVal() const {
+    assert((Kind == k_MImmOp) && "Invalid access!");
+    return MImm.Val;
+  }
+  bool getM0Flag() const {
+    assert((Kind == k_MImmOp) && "Invalid access!");
+    return MImm.M0Flag;
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const override { return StartLoc; }
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const override { return EndLoc; }
+
+  void print(raw_ostream &OS) const override {
+    switch (Kind) {
+    case k_Token:
+      OS << "Token: " << getToken() << "\n";
+      break;
+    case k_Register:
+      OS << "Reg: #" << getReg() << "\n";
+      break;
+    case k_Immediate:
+      OS << "Imm: " << getImm() << "\n";
+      break;
+    case k_MemoryRegRegImm:
+      assert(getMemOffset() != nullptr);
+      OS << "Mem: #" << getMemBase() << "+#" << getMemIndexReg() << "+"
+         << *getMemOffset() << "\n";
+      break;
+    case k_MemoryRegImmImm:
+      assert(getMemIndex() != nullptr && getMemOffset() != nullptr);
+      OS << "Mem: #" << getMemBase() << "+" << *getMemIndex() << "+"
+         << *getMemOffset() << "\n";
+      break;
+    case k_MemoryZeroRegImm:
+      assert(getMemOffset() != nullptr);
+      OS << "Mem: 0+#" << getMemIndexReg() << "+" << *getMemOffset() << "\n";
+      break;
+    case k_MemoryZeroImmImm:
+      assert(getMemIndex() != nullptr && getMemOffset() != nullptr);
+      OS << "Mem: 0+" << *getMemIndex() << "+" << *getMemOffset() << "\n";
+      break;
+    case k_MemoryRegImm:
+      assert(getMemOffset() != nullptr);
+      OS << "Mem: #" << getMemBase() << "+" << *getMemOffset() << "\n";
+      break;
+    case k_MemoryZeroImm:
+      assert(getMemOffset() != nullptr);
+      OS << "Mem: 0+" << *getMemOffset() << "\n";
+      break;
+    case k_CCOp:
+      OS << "CCOp: " << getCCVal() << "\n";
+      break;
+    case k_RDOp:
+      OS << "RDOp: " << getRDVal() << "\n";
+      break;
+    case k_MImmOp:
+      OS << "MImm: (" << getMImmVal() << (getM0Flag() ? ")0" : ")1") << "\n";
+      break;
+    }
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCExpr *Expr = getImm();
+    addExpr(Inst, Expr);
+  }
+
+  void addZeroOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addUImm0to2Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addUImm1Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addUImm2Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addUImm3Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addUImm6Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addUImm7Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addSImm7Operands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediate when possible.  Null MCExpr = 0.
+    if (!Expr)
+      Inst.addOperand(MCOperand::createImm(0));
+    else if (const auto *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::createExpr(Expr));
+  }
+
+  void addMEMrriOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createReg(getMemBase()));
+    Inst.addOperand(MCOperand::createReg(getMemIndexReg()));
+    addExpr(Inst, getMemOffset());
+  }
+
+  void addMEMriiOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createReg(getMemBase()));
+    addExpr(Inst, getMemIndex());
+    addExpr(Inst, getMemOffset());
+  }
+
+  void addMEMzriOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createImm(0));
+    Inst.addOperand(MCOperand::createReg(getMemIndexReg()));
+    addExpr(Inst, getMemOffset());
+  }
+
+  void addMEMziiOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createImm(0));
+    addExpr(Inst, getMemIndex());
+    addExpr(Inst, getMemOffset());
+  }
+
+  void addMEMriOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createReg(getMemBase()));
+    addExpr(Inst, getMemOffset());
+  }
+
+  void addMEMziOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createImm(0));
+    addExpr(Inst, getMemOffset());
+  }
+
+  void addCCOpOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createImm(getCCVal()));
+  }
+
+  void addRDOpOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createImm(getRDVal()));
+  }
+
+  void addMImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const auto *ConstExpr = dyn_cast<MCConstantExpr>(getMImmVal());
+    assert(ConstExpr && "Null operands!");
+    int64_t Value = ConstExpr->getValue();
+    if (getM0Flag())
+      Value += 64;
+    Inst.addOperand(MCOperand::createImm(Value));
+  }
+
+  static std::unique_ptr<VEOperand> CreateToken(StringRef Str, SMLoc S) {
+    auto Op = std::make_unique<VEOperand>(k_Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand> CreateReg(unsigned RegNum, SMLoc S,
+                                              SMLoc E) {
+    auto Op = std::make_unique<VEOperand>(k_Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand> CreateImm(const MCExpr *Val, SMLoc S,
+                                              SMLoc E) {
+    auto Op = std::make_unique<VEOperand>(k_Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand> CreateCCOp(unsigned CCVal, SMLoc S,
+                                               SMLoc E) {
+    auto Op = std::make_unique<VEOperand>(k_CCOp);
+    Op->CC.CCVal = CCVal;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand> CreateRDOp(unsigned RDVal, SMLoc S,
+                                               SMLoc E) {
+    auto Op = std::make_unique<VEOperand>(k_RDOp);
+    Op->RD.RDVal = RDVal;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand> CreateMImm(const MCExpr *Val, bool Flag,
+                                               SMLoc S, SMLoc E) {
+    auto Op = std::make_unique<VEOperand>(k_MImmOp);
+    Op->MImm.Val = Val;
+    Op->MImm.M0Flag = Flag;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static bool MorphToI32Reg(VEOperand &Op) {
+    unsigned Reg = Op.getReg();
+    unsigned regIdx = Reg - VE::SX0;
+    if (regIdx > 63)
+      return false;
+    Op.Reg.RegNum = I32Regs[regIdx];
+    return true;
+  }
+
+  static bool MorphToF32Reg(VEOperand &Op) {
+    unsigned Reg = Op.getReg();
+    unsigned regIdx = Reg - VE::SX0;
+    if (regIdx > 63)
+      return false;
+    Op.Reg.RegNum = F32Regs[regIdx];
+    return true;
+  }
+
+  static bool MorphToF128Reg(VEOperand &Op) {
+    unsigned Reg = Op.getReg();
+    unsigned regIdx = Reg - VE::SX0;
+    if (regIdx % 2 || regIdx > 63)
+      return false;
+    Op.Reg.RegNum = F128Regs[regIdx / 2];
+    return true;
+  }
+
+  static bool MorphToMISCReg(VEOperand &Op) {
+    const auto *ConstExpr = dyn_cast<MCConstantExpr>(Op.getImm());
+    if (!ConstExpr)
+      return false;
+    unsigned regIdx = ConstExpr->getValue();
+    if (regIdx > 31 || MISCRegs[regIdx] == VE::NoRegister)
+      return false;
+    Op.Kind = k_Register;
+    Op.Reg.RegNum = MISCRegs[regIdx];
+    return true;
+  }
+
+  static std::unique_ptr<VEOperand>
+  MorphToMEMri(unsigned Base, std::unique_ptr<VEOperand> Op) {
+    const MCExpr *Imm = Op->getImm();
+    Op->Kind = k_MemoryRegImm;
+    Op->Mem.Base = Base;
+    Op->Mem.IndexReg = 0;
+    Op->Mem.Index = nullptr;
+    Op->Mem.Offset = Imm;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand>
+  MorphToMEMzi(std::unique_ptr<VEOperand> Op) {
+    const MCExpr *Imm = Op->getImm();
+    Op->Kind = k_MemoryZeroImm;
+    Op->Mem.Base = 0;
+    Op->Mem.IndexReg = 0;
+    Op->Mem.Index = nullptr;
+    Op->Mem.Offset = Imm;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand>
+  MorphToMEMrri(unsigned Base, unsigned Index, std::unique_ptr<VEOperand> Op) {
+    const MCExpr *Imm = Op->getImm();
+    Op->Kind = k_MemoryRegRegImm;
+    Op->Mem.Base = Base;
+    Op->Mem.IndexReg = Index;
+    Op->Mem.Index = nullptr;
+    Op->Mem.Offset = Imm;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand>
+  MorphToMEMrii(unsigned Base, const MCExpr *Index,
+                std::unique_ptr<VEOperand> Op) {
+    const MCExpr *Imm = Op->getImm();
+    Op->Kind = k_MemoryRegImmImm;
+    Op->Mem.Base = Base;
+    Op->Mem.IndexReg = 0;
+    Op->Mem.Index = Index;
+    Op->Mem.Offset = Imm;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand>
+  MorphToMEMzri(unsigned Index, std::unique_ptr<VEOperand> Op) {
+    const MCExpr *Imm = Op->getImm();
+    Op->Kind = k_MemoryZeroRegImm;
+    Op->Mem.Base = 0;
+    Op->Mem.IndexReg = Index;
+    Op->Mem.Index = nullptr;
+    Op->Mem.Offset = Imm;
+    return Op;
+  }
+
+  static std::unique_ptr<VEOperand>
+  MorphToMEMzii(const MCExpr *Index, std::unique_ptr<VEOperand> Op) {
+    const MCExpr *Imm = Op->getImm();
+    Op->Kind = k_MemoryZeroImmImm;
+    Op->Mem.Base = 0;
+    Op->Mem.IndexReg = 0;
+    Op->Mem.Index = Index;
+    Op->Mem.Offset = Imm;
+    return Op;
+  }
+};
+
+} // end anonymous namespace
+
+bool VEAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                          OperandVector &Operands,
+                                          MCStreamer &Out, uint64_t &ErrorInfo,
+                                          bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult =
+      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+  switch (MatchResult) {
+  case Match_Success:
+    Inst.setLoc(IDLoc);
+    Out.emitInstruction(Inst, getSTI());
+    return false;
+
+  case Match_MissingFeature:
+    return Error(IDLoc,
+                 "instruction requires a CPU feature not currently enabled");
+
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0ULL) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((VEOperand &)*Operands[ErrorInfo]).getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  case Match_MnemonicFail:
+    return Error(IDLoc, "invalid instruction mnemonic");
+  }
+  llvm_unreachable("Implement any new match types added!");
+}
+
+bool VEAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                SMLoc &EndLoc) {
+  if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+    return Error(StartLoc, "invalid register name");
+  return false;
+}
+
+/// Parses a register name using a given matching function.
+/// Checks for lowercase or uppercase if necessary.
+int VEAsmParser::parseRegisterName(unsigned (*matchFn)(StringRef)) {
+  StringRef Name = Parser.getTok().getString();
+
+  int RegNum = matchFn(Name);
+
+  // GCC supports case insensitive register names. All of the VE registers
+  // are all lower case.
+  if (RegNum == VE::NoRegister) {
+    RegNum = matchFn(Name.lower());
+  }
+
+  return RegNum;
+}
+
+/// Maps from the set of all register names to a register number.
+/// \note Generated by TableGen.
+static unsigned MatchRegisterName(StringRef Name);
+
+/// Maps from the set of all alternative registernames to a register number.
+/// \note Generated by TableGen.
+static unsigned MatchRegisterAltName(StringRef Name);
+
+OperandMatchResultTy
+VEAsmParser::tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  const AsmToken Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  RegNo = 0;
+  if (getLexer().getKind() != AsmToken::Percent)
+    return MatchOperand_NoMatch;
+  Parser.Lex();
+
+  RegNo = parseRegisterName(&MatchRegisterName);
+  if (RegNo == VE::NoRegister)
+    RegNo = parseRegisterName(&MatchRegisterAltName);
+
+  if (RegNo != VE::NoRegister) {
+    Parser.Lex();
+    return MatchOperand_Success;
+  }
+
+  getLexer().UnLex(Tok);
+  return MatchOperand_NoMatch;
+}
+
+static StringRef parseCC(StringRef Name, unsigned Prefix, unsigned Suffix,
+                         bool IntegerCC, bool OmitCC, SMLoc NameLoc,
+                         OperandVector *Operands) {
+  // Parse instructions with a conditional code. For example, 'bne' is
+  // converted into two operands 'b' and 'ne'.
+  StringRef Cond = Name.slice(Prefix, Suffix);
+  VECC::CondCode CondCode =
+      IntegerCC ? stringToVEICondCode(Cond) : stringToVEFCondCode(Cond);
+
+  // If OmitCC is enabled, CC_AT and CC_AF is treated as a part of mnemonic.
+  if (CondCode != VECC::UNKNOWN &&
+      (!OmitCC || (CondCode != VECC::CC_AT && CondCode != VECC::CC_AF))) {
+    StringRef SuffixStr = Name.substr(Suffix);
+    // Push "b".
+    Name = Name.slice(0, Prefix);
+    Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+    // Push $cond part.
+    SMLoc CondLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Prefix);
+    SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Suffix);
+    Operands->push_back(VEOperand::CreateCCOp(CondCode, CondLoc, SuffixLoc));
+    // push suffix like ".l.t"
+    if (!SuffixStr.empty())
+      Operands->push_back(VEOperand::CreateToken(SuffixStr, SuffixLoc));
+  } else {
+    Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+  }
+  return Name;
+}
+
+static StringRef parseRD(StringRef Name, unsigned Prefix, SMLoc NameLoc,
+                         OperandVector *Operands) {
+  // Parse instructions with a conditional code. For example, 'cvt.w.d.sx.rz'
+  // is converted into two operands 'cvt.w.d.sx' and '.rz'.
+  StringRef RD = Name.substr(Prefix);
+  VERD::RoundingMode RoundingMode = stringToVERD(RD);
+
+  if (RoundingMode != VERD::UNKNOWN) {
+    Name = Name.slice(0, Prefix);
+    // push 1st like `cvt.w.d.sx`
+    Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+    SMLoc SuffixLoc =
+        SMLoc::getFromPointer(NameLoc.getPointer() + (RD.data() - Name.data()));
+    SMLoc SuffixEnd =
+        SMLoc::getFromPointer(NameLoc.getPointer() + (RD.end() - Name.data()));
+    // push $round if it has rounding mode
+    Operands->push_back(
+        VEOperand::CreateRDOp(RoundingMode, SuffixLoc, SuffixEnd));
+  } else {
+    Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+  }
+  return Name;
+}
+
+// Split the mnemonic into ASM operand, conditional code and instruction
+// qualifier (half-word, byte).
+StringRef VEAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
+                                     OperandVector *Operands) {
+  // Create the leading tokens for the mnemonic
+  StringRef Mnemonic = Name;
+
+  if (Name[0] == 'b') {
+    // Match b?? or br??.
+    size_t Start = 1;
+    size_t Next = Name.find('.');
+    // Adjust position of CondCode.
+    if (Name.size() > 1 && Name[1] == 'r')
+      Start = 2;
+    // Check suffix.
+    bool ICC = true;
+    if (Next + 1 < Name.size() &&
+        (Name[Next + 1] == 'd' || Name[Next + 1] == 's'))
+      ICC = false;
+    Mnemonic = parseCC(Name, Start, Next, ICC, true, NameLoc, Operands);
+  } else if (Name.startswith("cmov.l.") || Name.startswith("cmov.w.") ||
+             Name.startswith("cmov.d.") || Name.startswith("cmov.s.")) {
+    bool ICC = Name[5] == 'l' || Name[5] == 'w';
+    Mnemonic = parseCC(Name, 7, Name.size(), ICC, false, NameLoc, Operands);
+  } else if (Name.startswith("cvt.w.d.sx") || Name.startswith("cvt.w.d.zx") ||
+             Name.startswith("cvt.w.s.sx") || Name.startswith("cvt.w.s.zx")) {
+    Mnemonic = parseRD(Name, 10, NameLoc, Operands);
+  } else if (Name.startswith("cvt.l.d")) {
+    Mnemonic = parseRD(Name, 7, NameLoc, Operands);
+  } else {
+    Operands->push_back(VEOperand::CreateToken(Mnemonic, NameLoc));
+  }
+
+  return Mnemonic;
+}
+
+static void applyMnemonicAliases(StringRef &Mnemonic,
+                                 const FeatureBitset &Features,
+                                 unsigned VariantID);
+
+bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                   SMLoc NameLoc, OperandVector &Operands) {
+  // If the target architecture uses MnemonicAlias, call it here to parse
+  // operands correctly.
+  applyMnemonicAliases(Name, getAvailableFeatures(), 0);
+
+  // Split name to first token and the rest, e.g. "bgt.l.t" to "b", "gt", and
+  // ".l.t".  We treat "b" as a mnemonic, "gt" as first operand, and ".l.t"
+  // as second operand.
+  StringRef Mnemonic = splitMnemonic(Name, NameLoc, &Operands);
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (parseOperand(Operands, Mnemonic) != MatchOperand_Success) {
+      SMLoc Loc = getLexer().getLoc();
+      return Error(Loc, "unexpected token");
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat the comma.
+      // Parse and remember the operand.
+      if (parseOperand(Operands, Mnemonic) != MatchOperand_Success) {
+        SMLoc Loc = getLexer().getLoc();
+        return Error(Loc, "unexpected token");
+      }
+    }
+  }
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    SMLoc Loc = getLexer().getLoc();
+    return Error(Loc, "unexpected token");
+  }
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool VEAsmParser::ParseDirective(AsmToken DirectiveID) {
+  // Let the MC layer to handle other directives.
+  return true;
+}
+
+/// Extract \code @lo32/@hi32/etc \endcode modifier from expression.
+/// Recursively scan the expression and check for VK_VE_HI32/LO32/etc
+/// symbol variants.  If all symbols with modifier use the same
+/// variant, return the corresponding VEMCExpr::VariantKind,
+/// and a modified expression using the default symbol variant.
+/// Otherwise, return NULL.
+const MCExpr *
+VEAsmParser::extractModifierFromExpr(const MCExpr *E,
+                                     VEMCExpr::VariantKind &Variant) {
+  MCContext &Context = getParser().getContext();
+  Variant = VEMCExpr::VK_VE_None;
+
+  switch (E->getKind()) {
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    return nullptr;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+    switch (SRE->getKind()) {
+    case MCSymbolRefExpr::VK_None:
+      // Use VK_VE_REFLONG to a symbol without modifiers.
+      Variant = VEMCExpr::VK_VE_REFLONG;
+      break;
+    case MCSymbolRefExpr::VK_VE_HI32:
+      Variant = VEMCExpr::VK_VE_HI32;
+      break;
+    case MCSymbolRefExpr::VK_VE_LO32:
+      Variant = VEMCExpr::VK_VE_LO32;
+      break;
+    case MCSymbolRefExpr::VK_VE_PC_HI32:
+      Variant = VEMCExpr::VK_VE_PC_HI32;
+      break;
+    case MCSymbolRefExpr::VK_VE_PC_LO32:
+      Variant = VEMCExpr::VK_VE_PC_LO32;
+      break;
+    case MCSymbolRefExpr::VK_VE_GOT_HI32:
+      Variant = VEMCExpr::VK_VE_GOT_HI32;
+      break;
+    case MCSymbolRefExpr::VK_VE_GOT_LO32:
+      Variant = VEMCExpr::VK_VE_GOT_LO32;
+      break;
+    case MCSymbolRefExpr::VK_VE_GOTOFF_HI32:
+      Variant = VEMCExpr::VK_VE_GOTOFF_HI32;
+      break;
+    case MCSymbolRefExpr::VK_VE_GOTOFF_LO32:
+      Variant = VEMCExpr::VK_VE_GOTOFF_LO32;
+      break;
+    case MCSymbolRefExpr::VK_VE_PLT_HI32:
+      Variant = VEMCExpr::VK_VE_PLT_HI32;
+      break;
+    case MCSymbolRefExpr::VK_VE_PLT_LO32:
+      Variant = VEMCExpr::VK_VE_PLT_LO32;
+      break;
+    case MCSymbolRefExpr::VK_VE_TLS_GD_HI32:
+      Variant = VEMCExpr::VK_VE_TLS_GD_HI32;
+      break;
+    case MCSymbolRefExpr::VK_VE_TLS_GD_LO32:
+      Variant = VEMCExpr::VK_VE_TLS_GD_LO32;
+      break;
+    case MCSymbolRefExpr::VK_VE_TPOFF_HI32:
+      Variant = VEMCExpr::VK_VE_TPOFF_HI32;
+      break;
+    case MCSymbolRefExpr::VK_VE_TPOFF_LO32:
+      Variant = VEMCExpr::VK_VE_TPOFF_LO32;
+      break;
+    default:
+      return nullptr;
+    }
+
+    return MCSymbolRefExpr::create(&SRE->getSymbol(), Context);
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+    const MCExpr *Sub = extractModifierFromExpr(UE->getSubExpr(), Variant);
+    if (!Sub)
+      return nullptr;
+    return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    VEMCExpr::VariantKind LHSVariant, RHSVariant;
+    const MCExpr *LHS = extractModifierFromExpr(BE->getLHS(), LHSVariant);
+    const MCExpr *RHS = extractModifierFromExpr(BE->getRHS(), RHSVariant);
+
+    if (!LHS && !RHS)
+      return nullptr;
+
+    if (!LHS)
+      LHS = BE->getLHS();
+    if (!RHS)
+      RHS = BE->getRHS();
+
+    if (LHSVariant == VEMCExpr::VK_VE_None)
+      Variant = RHSVariant;
+    else if (RHSVariant == VEMCExpr::VK_VE_None)
+      Variant = LHSVariant;
+    else if (LHSVariant == RHSVariant)
+      Variant = LHSVariant;
+    else
+      return nullptr;
+
+    return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
+  }
+  }
+
+  llvm_unreachable("Invalid expression kind!");
+}
+
+const MCExpr *VEAsmParser::fixupVariantKind(const MCExpr *E) {
+  MCContext &Context = getParser().getContext();
+
+  switch (E->getKind()) {
+  case MCExpr::Target:
+  case MCExpr::Constant:
+  case MCExpr::SymbolRef:
+    return E;
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+    const MCExpr *Sub = fixupVariantKind(UE->getSubExpr());
+    if (Sub == UE->getSubExpr())
+      return E;
+    return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    const MCExpr *LHS = fixupVariantKind(BE->getLHS());
+    const MCExpr *RHS = fixupVariantKind(BE->getRHS());
+    if (LHS == BE->getLHS() && RHS == BE->getRHS())
+      return E;
+    return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
+  }
+  }
+
+  llvm_unreachable("Invalid expression kind!");
+}
+
+/// ParseExpression.  This differs from the default "parseExpression" in that
+/// it handles modifiers.
+bool VEAsmParser::parseExpression(const MCExpr *&EVal) {
+  // Handle \code symbol @lo32/@hi32/etc \endcode.
+  if (getParser().parseExpression(EVal))
+    return true;
+
+  // Convert MCSymbolRefExpr with VK_* to MCExpr with VK_*.
+  EVal = fixupVariantKind(EVal);
+  VEMCExpr::VariantKind Variant;
+  const MCExpr *E = extractModifierFromExpr(EVal, Variant);
+  if (E)
+    EVal = VEMCExpr::create(Variant, E, getParser().getContext());
+
+  return false;
+}
+
+OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
+  LLVM_DEBUG(dbgs() << "parseMEMOperand\n");
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc S = Tok.getLoc();
+  SMLoc E = Tok.getEndLoc();
+  // Parse ASX format
+  //   disp
+  //   disp(, base)
+  //   disp(index)
+  //   disp(index, base)
+  //   (, base)
+  //   (index)
+  //   (index, base)
+
+  std::unique_ptr<VEOperand> Offset;
+  switch (getLexer().getKind()) {
+  default:
+    return MatchOperand_NoMatch;
+
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Dot:
+  case AsmToken::Identifier: {
+    const MCExpr *EVal;
+    if (!parseExpression(EVal))
+      Offset = VEOperand::CreateImm(EVal, S, E);
+    else
+      return MatchOperand_NoMatch;
+    break;
+  }
+
+  case AsmToken::LParen:
+    // empty disp (= 0)
+    Offset =
+        VEOperand::CreateImm(MCConstantExpr::create(0, getContext()), S, E);
+    break;
+  }
+
+  switch (getLexer().getKind()) {
+  default:
+    return MatchOperand_ParseFail;
+
+  case AsmToken::EndOfStatement:
+    Operands.push_back(VEOperand::MorphToMEMzii(
+        MCConstantExpr::create(0, getContext()), std::move(Offset)));
+    return MatchOperand_Success;
+
+  case AsmToken::LParen:
+    Parser.Lex(); // Eat the (
+    break;
+  }
+
+  const MCExpr *IndexValue = nullptr;
+  unsigned IndexReg = 0;
+
+  switch (getLexer().getKind()) {
+  default:
+    if (ParseRegister(IndexReg, S, E))
+      return MatchOperand_ParseFail;
+    break;
+
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Dot:
+    if (getParser().parseExpression(IndexValue, E))
+      return MatchOperand_ParseFail;
+    break;
+
+  case AsmToken::Comma:
+    // empty index
+    IndexValue = MCConstantExpr::create(0, getContext());
+    break;
+  }
+
+  switch (getLexer().getKind()) {
+  default:
+    return MatchOperand_ParseFail;
+
+  case AsmToken::RParen:
+    Parser.Lex(); // Eat the )
+    Operands.push_back(
+        IndexValue ? VEOperand::MorphToMEMzii(IndexValue, std::move(Offset))
+                   : VEOperand::MorphToMEMzri(IndexReg, std::move(Offset)));
+    return MatchOperand_Success;
+
+  case AsmToken::Comma:
+    Parser.Lex(); // Eat the ,
+    break;
+  }
+
+  unsigned BaseReg = 0;
+  if (ParseRegister(BaseReg, S, E))
+    return MatchOperand_ParseFail;
+
+  if (!Parser.getTok().is(AsmToken::RParen))
+    return MatchOperand_ParseFail;
+
+  Parser.Lex(); // Eat the )
+  Operands.push_back(
+      IndexValue
+          ? VEOperand::MorphToMEMrii(BaseReg, IndexValue, std::move(Offset))
+          : VEOperand::MorphToMEMrri(BaseReg, IndexReg, std::move(Offset)));
+
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
+  LLVM_DEBUG(dbgs() << "parseMEMAsOperand\n");
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc S = Tok.getLoc();
+  SMLoc E = Tok.getEndLoc();
+  // Parse AS format
+  //   disp
+  //   disp(, base)
+  //   disp(base)
+  //   disp()
+  //   (, base)
+  //   (base)
+  //   base
+
+  unsigned BaseReg = VE::NoRegister;
+  std::unique_ptr<VEOperand> Offset;
+  switch (getLexer().getKind()) {
+  default:
+    return MatchOperand_NoMatch;
+
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Dot:
+  case AsmToken::Identifier: {
+    const MCExpr *EVal;
+    if (!parseExpression(EVal))
+      Offset = VEOperand::CreateImm(EVal, S, E);
+    else
+      return MatchOperand_NoMatch;
+    break;
+  }
+
+  case AsmToken::Percent:
+    if (ParseRegister(BaseReg, S, E))
+      return MatchOperand_NoMatch;
+    Offset =
+        VEOperand::CreateImm(MCConstantExpr::create(0, getContext()), S, E);
+    break;
+
+  case AsmToken::LParen:
+    // empty disp (= 0)
+    Offset =
+        VEOperand::CreateImm(MCConstantExpr::create(0, getContext()), S, E);
+    break;
+  }
+
+  switch (getLexer().getKind()) {
+  default:
+    return MatchOperand_ParseFail;
+
+  case AsmToken::EndOfStatement:
+  case AsmToken::Comma:
+    Operands.push_back(BaseReg != VE::NoRegister
+                           ? VEOperand::MorphToMEMri(BaseReg, std::move(Offset))
+                           : VEOperand::MorphToMEMzi(std::move(Offset)));
+    return MatchOperand_Success;
+
+  case AsmToken::LParen:
+    if (BaseReg != VE::NoRegister)
+      return MatchOperand_ParseFail;
+    Parser.Lex(); // Eat the (
+    break;
+  }
+
+  switch (getLexer().getKind()) {
+  default:
+    if (ParseRegister(BaseReg, S, E))
+      return MatchOperand_ParseFail;
+    break;
+
+  case AsmToken::Comma:
+    Parser.Lex(); // Eat the ,
+    if (ParseRegister(BaseReg, S, E))
+      return MatchOperand_ParseFail;
+    break;
+
+  case AsmToken::RParen:
+    break;
+  }
+
+  if (!Parser.getTok().is(AsmToken::RParen))
+    return MatchOperand_ParseFail;
+
+  Parser.Lex(); // Eat the )
+  Operands.push_back(BaseReg != VE::NoRegister
+                         ? VEOperand::MorphToMEMri(BaseReg, std::move(Offset))
+                         : VEOperand::MorphToMEMzi(std::move(Offset)));
+
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy VEAsmParser::parseMImmOperand(OperandVector &Operands) {
+  LLVM_DEBUG(dbgs() << "parseMImmOperand\n");
+
+  // Parsing "(" + number + ")0/1"
+  const AsmToken Tok1 = Parser.getTok();
+  if (!Tok1.is(AsmToken::LParen))
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat the '('.
+
+  const AsmToken Tok2 = Parser.getTok();
+  SMLoc E;
+  const MCExpr *EVal;
+  if (!Tok2.is(AsmToken::Integer) || getParser().parseExpression(EVal, E)) {
+    getLexer().UnLex(Tok1);
+    return MatchOperand_NoMatch;
+  }
+
+  const AsmToken Tok3 = Parser.getTok();
+  if (!Tok3.is(AsmToken::RParen)) {
+    getLexer().UnLex(Tok2);
+    getLexer().UnLex(Tok1);
+    return MatchOperand_NoMatch;
+  }
+  Parser.Lex(); // Eat the ')'.
+
+  const AsmToken &Tok4 = Parser.getTok();
+  StringRef Suffix = Tok4.getString();
+  if (Suffix != "1" && Suffix != "0") {
+    getLexer().UnLex(Tok3);
+    getLexer().UnLex(Tok2);
+    getLexer().UnLex(Tok1);
+    return MatchOperand_NoMatch;
+  }
+  Parser.Lex(); // Eat the value.
+  SMLoc EndLoc = SMLoc::getFromPointer(Suffix.end());
+  Operands.push_back(
+      VEOperand::CreateMImm(EVal, Suffix == "0", Tok1.getLoc(), EndLoc));
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
+                                               StringRef Mnemonic) {
+  LLVM_DEBUG(dbgs() << "parseOperand\n");
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+    return ResTy;
+
+  switch (getLexer().getKind()) {
+  case AsmToken::LParen:
+    // FIXME: Parsing "(" + %vreg + ", " + %vreg + ")"
+    // FALLTHROUGH
+  default: {
+    std::unique_ptr<VEOperand> Op;
+    ResTy = parseVEAsmOperand(Op);
+    if (ResTy != MatchOperand_Success || !Op)
+      return MatchOperand_ParseFail;
+
+    // Push the parsed operand into the list of operands
+    Operands.push_back(std::move(Op));
+
+    if (!Parser.getTok().is(AsmToken::LParen))
+      break;
+
+    // FIXME: Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+    break;
+  }
+  }
+
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+VEAsmParser::parseVEAsmOperand(std::unique_ptr<VEOperand> &Op) {
+  LLVM_DEBUG(dbgs() << "parseVEAsmOperand\n");
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  const MCExpr *EVal;
+
+  Op = nullptr;
+  switch (getLexer().getKind()) {
+  default:
+    break;
+
+  case AsmToken::Percent:
+    unsigned RegNo;
+    if (tryParseRegister(RegNo, S, E) == MatchOperand_Success)
+      Op = VEOperand::CreateReg(RegNo, S, E);
+    break;
+
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Dot:
+  case AsmToken::Identifier:
+    if (!parseExpression(EVal))
+      Op = VEOperand::CreateImm(EVal, S, E);
+    break;
+  }
+  return (Op) ? MatchOperand_Success : MatchOperand_ParseFail;
+}
+
+// Force static initialization.
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmParser() {
+  RegisterMCAsmParser<VEAsmParser> A(getTheVETarget());
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "VEGenAsmMatcher.inc"
+
+unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
+                                                 unsigned Kind) {
+  VEOperand &Op = (VEOperand &)GOp;
+
+  // VE uses identical register name for all registers like both
+  // F32 and I32 uses "%s23".  Need to convert the name of them
+  // for validation.
+  switch (Kind) {
+  default:
+    break;
+  case MCK_F32:
+    if (Op.isReg() && VEOperand::MorphToF32Reg(Op))
+      return MCTargetAsmParser::Match_Success;
+    break;
+  case MCK_I32:
+    if (Op.isReg() && VEOperand::MorphToI32Reg(Op))
+      return MCTargetAsmParser::Match_Success;
+    break;
+  case MCK_F128:
+    if (Op.isReg() && VEOperand::MorphToF128Reg(Op))
+      return MCTargetAsmParser::Match_Success;
+    break;
+  case MCK_MISC:
+    if (Op.isImm() && VEOperand::MorphToMISCReg(Op))
+      return MCTargetAsmParser::Match_Success;
+    break;
+  }
+  return Match_InvalidOperand;
+}
diff --git a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
new file mode 100644
index 0000000000000..35885a4e3cae5
--- /dev/null
+++ b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -0,0 +1,560 @@
+//===- VEDisassembler.cpp - Disassembler for VE -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the VE Disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "TargetInfo/VETargetInfo.h"
+#include "VE.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// A disassembler class for VE.
+class VEDisassembler : public MCDisassembler {
+public:
+  VEDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+      : MCDisassembler(STI, Ctx) {}
+  virtual ~VEDisassembler() {}
+
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &CStream) const override;
+};
+} // namespace
+
+static MCDisassembler *createVEDisassembler(const Target &T,
+                                            const MCSubtargetInfo &STI,
+                                            MCContext &Ctx) {
+  return new VEDisassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeVEDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(getTheVETarget(),
+                                         createVEDisassembler);
+}
+
+static const unsigned I32RegDecoderTable[] = {
+    VE::SW0,  VE::SW1,  VE::SW2,  VE::SW3,  VE::SW4,  VE::SW5,  VE::SW6,
+    VE::SW7,  VE::SW8,  VE::SW9,  VE::SW10, VE::SW11, VE::SW12, VE::SW13,
+    VE::SW14, VE::SW15, VE::SW16, VE::SW17, VE::SW18, VE::SW19, VE::SW20,
+    VE::SW21, VE::SW22, VE::SW23, VE::SW24, VE::SW25, VE::SW26, VE::SW27,
+    VE::SW28, VE::SW29, VE::SW30, VE::SW31, VE::SW32, VE::SW33, VE::SW34,
+    VE::SW35, VE::SW36, VE::SW37, VE::SW38, VE::SW39, VE::SW40, VE::SW41,
+    VE::SW42, VE::SW43, VE::SW44, VE::SW45, VE::SW46, VE::SW47, VE::SW48,
+    VE::SW49, VE::SW50, VE::SW51, VE::SW52, VE::SW53, VE::SW54, VE::SW55,
+    VE::SW56, VE::SW57, VE::SW58, VE::SW59, VE::SW60, VE::SW61, VE::SW62,
+    VE::SW63};
+
+static const unsigned I64RegDecoderTable[] = {
+    VE::SX0,  VE::SX1,  VE::SX2,  VE::SX3,  VE::SX4,  VE::SX5,  VE::SX6,
+    VE::SX7,  VE::SX8,  VE::SX9,  VE::SX10, VE::SX11, VE::SX12, VE::SX13,
+    VE::SX14, VE::SX15, VE::SX16, VE::SX17, VE::SX18, VE::SX19, VE::SX20,
+    VE::SX21, VE::SX22, VE::SX23, VE::SX24, VE::SX25, VE::SX26, VE::SX27,
+    VE::SX28, VE::SX29, VE::SX30, VE::SX31, VE::SX32, VE::SX33, VE::SX34,
+    VE::SX35, VE::SX36, VE::SX37, VE::SX38, VE::SX39, VE::SX40, VE::SX41,
+    VE::SX42, VE::SX43, VE::SX44, VE::SX45, VE::SX46, VE::SX47, VE::SX48,
+    VE::SX49, VE::SX50, VE::SX51, VE::SX52, VE::SX53, VE::SX54, VE::SX55,
+    VE::SX56, VE::SX57, VE::SX58, VE::SX59, VE::SX60, VE::SX61, VE::SX62,
+    VE::SX63};
+
+static const unsigned F32RegDecoderTable[] = {
+    VE::SF0,  VE::SF1,  VE::SF2,  VE::SF3,  VE::SF4,  VE::SF5,  VE::SF6,
+    VE::SF7,  VE::SF8,  VE::SF9,  VE::SF10, VE::SF11, VE::SF12, VE::SF13,
+    VE::SF14, VE::SF15, VE::SF16, VE::SF17, VE::SF18, VE::SF19, VE::SF20,
+    VE::SF21, VE::SF22, VE::SF23, VE::SF24, VE::SF25, VE::SF26, VE::SF27,
+    VE::SF28, VE::SF29, VE::SF30, VE::SF31, VE::SF32, VE::SF33, VE::SF34,
+    VE::SF35, VE::SF36, VE::SF37, VE::SF38, VE::SF39, VE::SF40, VE::SF41,
+    VE::SF42, VE::SF43, VE::SF44, VE::SF45, VE::SF46, VE::SF47, VE::SF48,
+    VE::SF49, VE::SF50, VE::SF51, VE::SF52, VE::SF53, VE::SF54, VE::SF55,
+    VE::SF56, VE::SF57, VE::SF58, VE::SF59, VE::SF60, VE::SF61, VE::SF62,
+    VE::SF63};
+
+static const unsigned F128RegDecoderTable[] = {
+    VE::Q0,  VE::Q1,  VE::Q2,  VE::Q3,  VE::Q4,  VE::Q5,  VE::Q6,  VE::Q7,
+    VE::Q8,  VE::Q9,  VE::Q10, VE::Q11, VE::Q12, VE::Q13, VE::Q14, VE::Q15,
+    VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
+    VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+
+static const unsigned MiscRegDecoderTable[] = {
+    VE::USRCC,      VE::PSW,        VE::SAR,        VE::NoRegister,
+    VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
+    VE::PMCR0,      VE::PMCR1,      VE::PMCR2,      VE::PMCR3,
+    VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::NoRegister,
+    VE::PMC0,       VE::PMC1,       VE::PMC2,       VE::PMC3,
+    VE::PMC4,       VE::PMC5,       VE::PMC6,       VE::PMC7,
+    VE::PMC8,       VE::PMC9,       VE::PMC10,      VE::PMC11,
+    VE::PMC12,      VE::PMC13,      VE::PMC14};
+
+static DecodeStatus DecodeI32RegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  if (RegNo > 63)
+    return MCDisassembler::Fail;
+  unsigned Reg = I32RegDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeI64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  if (RegNo > 63)
+    return MCDisassembler::Fail;
+  unsigned Reg = I64RegDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeF32RegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  if (RegNo > 63)
+    return MCDisassembler::Fail;
+  unsigned Reg = F32RegDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo % 2 || RegNo > 63)
+    return MCDisassembler::Fail;
+  unsigned Reg = F128RegDecoderTable[RegNo / 2];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 30)
+    return MCDisassembler::Fail;
+  unsigned Reg = MiscRegDecoderTable[RegNo];
+  if (Reg == VE::NoRegister)
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeASX(MCInst &Inst, uint64_t insn, uint64_t Address,
+                              const void *Decoder);
+static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn,
+                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn,
+                                     uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTS1AMI64(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTS1AMI32(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCASI64(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                 const void *Decoder);
+static DecodeStatus DecodeCASI32(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                 const void *Decoder);
+static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address,
+                               const void *Decoder);
+static DecodeStatus DecodeSIMM7(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                const void *Decoder);
+static DecodeStatus DecodeSIMM32(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                 const void *Decoder);
+static DecodeStatus DecodeCCOperand(MCInst &Inst, uint64_t insn,
+                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRDOperand(MCInst &Inst, uint64_t insn,
+                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBranchCondition(MCInst &Inst, uint64_t insn,
+                                          uint64_t Address,
+                                          const void *Decoder);
+static DecodeStatus DecodeBranchConditionAlways(MCInst &Inst, uint64_t insn,
+                                                uint64_t Address,
+                                                const void *Decoder);
+
+#include "VEGenDisassemblerTables.inc"
+
+/// Read four bytes from the ArrayRef and return 32 bit word.
+static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                      uint64_t &Size, uint64_t &Insn,
+                                      bool IsLittleEndian) {
+  // We want to read exactly 8 Bytes of data.
+  if (Bytes.size() < 8) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  Insn = IsLittleEndian
+             ? ((uint64_t)Bytes[0] << 0) | ((uint64_t)Bytes[1] << 8) |
+                   ((uint64_t)Bytes[2] << 16) | ((uint64_t)Bytes[3] << 24) |
+                   ((uint64_t)Bytes[4] << 32) | ((uint64_t)Bytes[5] << 40) |
+                   ((uint64_t)Bytes[6] << 48) | ((uint64_t)Bytes[7] << 56)
+             : ((uint64_t)Bytes[7] << 0) | ((uint64_t)Bytes[6] << 8) |
+                   ((uint64_t)Bytes[5] << 16) | ((uint64_t)Bytes[4] << 24) |
+                   ((uint64_t)Bytes[3] << 32) | ((uint64_t)Bytes[2] << 40) |
+                   ((uint64_t)Bytes[1] << 48) | ((uint64_t)Bytes[0] << 56);
+
+  return MCDisassembler::Success;
+}
+
+DecodeStatus VEDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+                                            ArrayRef<uint8_t> Bytes,
+                                            uint64_t Address,
+                                            raw_ostream &CStream) const {
+  uint64_t Insn;
+  bool isLittleEndian = getContext().getAsmInfo()->isLittleEndian();
+  DecodeStatus Result =
+      readInstruction64(Bytes, Address, Size, Insn, isLittleEndian);
+  if (Result == MCDisassembler::Fail)
+    return MCDisassembler::Fail;
+
+  // Calling the auto-generated decoder function.
+
+  Result = decodeInstruction(DecoderTableVE64, Instr, Insn, Address, this, STI);
+
+  if (Result != MCDisassembler::Fail) {
+    Size = 8;
+    return Result;
+  }
+
+  return MCDisassembler::Fail;
+}
+
+typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned RegNo, uint64_t Address,
+                                   const void *Decoder);
+
+static DecodeStatus DecodeASX(MCInst &MI, uint64_t insn, uint64_t Address,
+                              const void *Decoder) {
+  unsigned sy = fieldFromInstruction(insn, 40, 7);
+  bool cy = fieldFromInstruction(insn, 47, 1);
+  unsigned sz = fieldFromInstruction(insn, 32, 7);
+  bool cz = fieldFromInstruction(insn, 39, 1);
+  uint64_t simm32 = SignExtend64<32>(fieldFromInstruction(insn, 0, 32));
+  DecodeStatus status;
+
+  // Decode sz.
+  if (cz) {
+    status = DecodeI64RegisterClass(MI, sz, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  } else {
+    MI.addOperand(MCOperand::createImm(0));
+  }
+
+  // Decode sy.
+  if (cy) {
+    status = DecodeI64RegisterClass(MI, sy, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  } else {
+    MI.addOperand(MCOperand::createImm(SignExtend32<7>(sy)));
+  }
+
+  // Decode simm32.
+  MI.addOperand(MCOperand::createImm(simm32));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAS(MCInst &MI, uint64_t insn, uint64_t Address,
+                             const void *Decoder) {
+  unsigned sz = fieldFromInstruction(insn, 32, 7);
+  bool cz = fieldFromInstruction(insn, 39, 1);
+  uint64_t simm32 = SignExtend64<32>(fieldFromInstruction(insn, 0, 32));
+  DecodeStatus status;
+
+  // Decode sz.
+  if (cz) {
+    status = DecodeI64RegisterClass(MI, sz, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  } else {
+    MI.addOperand(MCOperand::createImm(0));
+  }
+
+  // Decode simm32.
+  MI.addOperand(MCOperand::createImm(simm32));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMem(MCInst &MI, uint64_t insn, uint64_t Address,
+                              const void *Decoder, bool isLoad,
+                              DecodeFunc DecodeSX) {
+  unsigned sx = fieldFromInstruction(insn, 48, 7);
+
+  DecodeStatus status;
+  if (isLoad) {
+    status = DecodeSX(MI, sx, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  }
+
+  status = DecodeASX(MI, insn, Address, Decoder);
+  if (status != MCDisassembler::Success)
+    return status;
+
+  if (!isLoad) {
+    status = DecodeSX(MI, sx, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  }
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemAS(MCInst &MI, uint64_t insn, uint64_t Address,
+                                const void *Decoder, bool isLoad,
+                                DecodeFunc DecodeSX) {
+  unsigned sx = fieldFromInstruction(insn, 48, 7);
+
+  DecodeStatus status;
+  if (isLoad) {
+    status = DecodeSX(MI, sx, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  }
+
+  status = DecodeAS(MI, insn, Address, Decoder);
+  if (status != MCDisassembler::Success)
+    return status;
+
+  if (!isLoad) {
+    status = DecodeSX(MI, sx, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  }
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                  const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                  const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address,
+                                  const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, true, DecodeF32RegisterClass);
+}
+
+static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn,
+                                   uint64_t Address, const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, false, DecodeF32RegisterClass);
+}
+
+static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn,
+                                    uint64_t Address, const void *Decoder) {
+  return DecodeMemAS(Inst, insn, Address, Decoder, true,
+                     DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn,
+                                     uint64_t Address, const void *Decoder) {
+  return DecodeMemAS(Inst, insn, Address, Decoder, false,
+                     DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeCAS(MCInst &MI, uint64_t insn, uint64_t Address,
+                              const void *Decoder, bool isImmOnly, bool isUImm,
+                              DecodeFunc DecodeSX) {
+  unsigned sx = fieldFromInstruction(insn, 48, 7);
+  bool cy = fieldFromInstruction(insn, 47, 1);
+  unsigned sy = fieldFromInstruction(insn, 40, 7);
+
+  // Add $sx.
+  DecodeStatus status;
+  status = DecodeSX(MI, sx, Address, Decoder);
+  if (status != MCDisassembler::Success)
+    return status;
+
+  // Add $disp($sz).
+  status = DecodeAS(MI, insn, Address, Decoder);
+  if (status != MCDisassembler::Success)
+    return status;
+
+  // Add $sy.
+  if (cy && !isImmOnly) {
+    status = DecodeSX(MI, sy, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  } else {
+    if (isUImm)
+      MI.addOperand(MCOperand::createImm(sy));
+    else
+      MI.addOperand(MCOperand::createImm(SignExtend32<7>(sy)));
+  }
+
+  // Add $sd.
+  status = DecodeSX(MI, sx, Address, Decoder);
+  if (status != MCDisassembler::Success)
+    return status;
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeTS1AMI64(MCInst &MI, uint64_t insn, uint64_t Address,
+                                   const void *Decoder) {
+  return DecodeCAS(MI, insn, Address, Decoder, false, true,
+                   DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeTS1AMI32(MCInst &MI, uint64_t insn, uint64_t Address,
+                                   const void *Decoder) {
+  return DecodeCAS(MI, insn, Address, Decoder, false, true,
+                   DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeCASI64(MCInst &MI, uint64_t insn, uint64_t Address,
+                                 const void *Decoder) {
+  return DecodeCAS(MI, insn, Address, Decoder, false, false,
+                   DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeCASI32(MCInst &MI, uint64_t insn, uint64_t Address,
+                                 const void *Decoder) {
+  return DecodeCAS(MI, insn, Address, Decoder, false, false,
+                   DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address,
+                               const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeSIMM7(MCInst &MI, uint64_t insn, uint64_t Address,
+                                const void *Decoder) {
+  uint64_t tgt = SignExtend64<7>(insn);
+  MI.addOperand(MCOperand::createImm(tgt));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSIMM32(MCInst &MI, uint64_t insn, uint64_t Address,
+                                 const void *Decoder) {
+  uint64_t tgt = SignExtend64<32>(insn);
+  MI.addOperand(MCOperand::createImm(tgt));
+  return MCDisassembler::Success;
+}
+
+static bool isIntegerBCKind(MCInst &MI) {
+
+#define BCm_kind(NAME)                                                         \
+  case NAME##rri:                                                              \
+  case NAME##rzi:                                                              \
+  case NAME##iri:                                                              \
+  case NAME##izi:                                                              \
+  case NAME##rri_nt:                                                           \
+  case NAME##rzi_nt:                                                           \
+  case NAME##iri_nt:                                                           \
+  case NAME##izi_nt:                                                           \
+  case NAME##rri_t:                                                            \
+  case NAME##rzi_t:                                                            \
+  case NAME##iri_t:                                                            \
+  case NAME##izi_t:
+
+#define BCRm_kind(NAME)                                                        \
+  case NAME##rr:                                                               \
+  case NAME##ir:                                                               \
+  case NAME##rr_nt:                                                            \
+  case NAME##ir_nt:                                                            \
+  case NAME##rr_t:                                                             \
+  case NAME##ir_t:
+
+  {
+    using namespace llvm::VE;
+    switch (MI.getOpcode()) {
+      BCm_kind(BCFL) BCm_kind(BCFW) BCRm_kind(BRCFL)
+          BCRm_kind(BRCFW) return true;
+    }
+  }
+#undef BCm_kind
+
+  return false;
+}
+
+// Decode CC Operand field.
+static DecodeStatus DecodeCCOperand(MCInst &MI, uint64_t cf, uint64_t Address,
+                                    const void *Decoder) {
+  MI.addOperand(MCOperand::createImm(VEValToCondCode(cf, isIntegerBCKind(MI))));
+  return MCDisassembler::Success;
+}
+
+// Decode RD Operand field.
+static DecodeStatus DecodeRDOperand(MCInst &MI, uint64_t cf, uint64_t Address,
+                                    const void *Decoder) {
+  MI.addOperand(MCOperand::createImm(VEValToRD(cf)));
+  return MCDisassembler::Success;
+}
+
+// Decode branch condition instruction and CCOperand field in it.
+static DecodeStatus DecodeBranchCondition(MCInst &MI, uint64_t insn,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  unsigned cf = fieldFromInstruction(insn, 48, 4);
+  bool cy = fieldFromInstruction(insn, 47, 1);
+  unsigned sy = fieldFromInstruction(insn, 40, 7);
+
+  // Decode cf.
+  MI.addOperand(MCOperand::createImm(VEValToCondCode(cf, isIntegerBCKind(MI))));
+
+  // Decode sy.
+  DecodeStatus status;
+  if (cy) {
+    status = DecodeI64RegisterClass(MI, sy, Address, Decoder);
+    if (status != MCDisassembler::Success)
+      return status;
+  } else {
+    MI.addOperand(MCOperand::createImm(SignExtend32<7>(sy)));
+  }
+
+  // Decode MEMri.
+  return DecodeAS(MI, insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeBranchConditionAlways(MCInst &MI, uint64_t insn,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  // Decode MEMri.
+  return DecodeAS(MI, insn, Address, Decoder);
+}
diff --git a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp
deleted file mode 100644
index 4e7bcd36c32a5..0000000000000
--- a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-//===-- VEInstPrinter.cpp - Convert VE MCInst to assembly syntax -----------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an VE MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "VEInstPrinter.h"
-#include "VE.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "ve-asmprinter"
-
-// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target
-// namespace.
-namespace llvm {
-namespace VE {
-using namespace VE;
-}
-} // namespace llvm
-
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "VEGenAsmWriter.inc"
-
-void VEInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << '%' << StringRef(getRegisterName(RegNo)).lower();
-}
-
-void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address,
-                              StringRef Annot, const MCSubtargetInfo &STI,
-                              raw_ostream &OS) {
-  if (!printAliasInstr(MI, STI, OS))
-    printInstruction(MI, Address, STI, OS);
-  printAnnotation(OS, Annot);
-}
-
-void VEInstPrinter::printOperand(const MCInst *MI, int opNum,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(opNum);
-
-  if (MO.isReg()) {
-    printRegName(O, MO.getReg());
-    return;
-  }
-
-  if (MO.isImm()) {
-    switch (MI->getOpcode()) {
-    default:
-      // Expects signed 32bit literals
-      assert(isInt<32>(MO.getImm()) && "Immediate too large");
-      int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
-      O << TruncatedImm;
-      return;
-    }
-  }
-
-  assert(MO.isExpr() && "Unknown operand kind in printOperand");
-  MO.getExpr()->print(O, &MAI);
-}
-
-void VEInstPrinter::printMemASXOperand(const MCInst *MI, int opNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O, const char *Modifier) {
-  // If this is an ADD operand, emit it like normal operands.
-  if (Modifier && !strcmp(Modifier, "arith")) {
-    printOperand(MI, opNum, STI, O);
-    O << ", ";
-    printOperand(MI, opNum + 1, STI, O);
-    return;
-  }
-
-  const MCOperand &MO = MI->getOperand(opNum + 1);
-  if (!MO.isImm() || MO.getImm() != 0) {
-    printOperand(MI, opNum + 1, STI, O);
-  }
-  O << "(,";
-  printOperand(MI, opNum, STI, O);
-  O << ")";
-}
-
-void VEInstPrinter::printMemASOperand(const MCInst *MI, int opNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O, const char *Modifier) {
-  // If this is an ADD operand, emit it like normal operands.
-  if (Modifier && !strcmp(Modifier, "arith")) {
-    printOperand(MI, opNum, STI, O);
-    O << ", ";
-    printOperand(MI, opNum + 1, STI, O);
-    return;
-  }
-
-  const MCOperand &MO = MI->getOperand(opNum + 1);
-  if (!MO.isImm() || MO.getImm() != 0) {
-    printOperand(MI, opNum + 1, STI, O);
-  }
-  O << "(";
-  printOperand(MI, opNum, STI, O);
-  O << ")";
-}
-
-void VEInstPrinter::printCCOperand(const MCInst *MI, int opNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  int CC = (int)MI->getOperand(opNum).getImm();
-  O << VECondCodeToString((VECC::CondCodes)CC);
-}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
new file mode 100644
index 0000000000000..9a6ae90b5c73c
--- /dev/null
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -0,0 +1,224 @@
+//===-- VEAsmBackend.cpp - VE Assembler Backend ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEFixupKinds.h"
+#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_1:
+  case FK_Data_2:
+  case FK_Data_4:
+  case FK_Data_8:
+  case FK_PCRel_1:
+  case FK_PCRel_2:
+  case FK_PCRel_4:
+  case FK_PCRel_8:
+    return Value;
+  case VE::fixup_ve_hi32:
+  case VE::fixup_ve_pc_hi32:
+  case VE::fixup_ve_got_hi32:
+  case VE::fixup_ve_gotoff_hi32:
+  case VE::fixup_ve_plt_hi32:
+  case VE::fixup_ve_tls_gd_hi32:
+  case VE::fixup_ve_tpoff_hi32:
+    return (Value >> 32) & 0xffffffff;
+  case VE::fixup_ve_reflong:
+  case VE::fixup_ve_lo32:
+  case VE::fixup_ve_pc_lo32:
+  case VE::fixup_ve_got_lo32:
+  case VE::fixup_ve_gotoff_lo32:
+  case VE::fixup_ve_plt_lo32:
+  case VE::fixup_ve_tls_gd_lo32:
+  case VE::fixup_ve_tpoff_lo32:
+    return Value & 0xffffffff;
+  }
+}
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_1:
+  case FK_PCRel_1:
+    return 1;
+  case FK_Data_2:
+  case FK_PCRel_2:
+    return 2;
+    return 4;
+  case FK_Data_4:
+  case FK_PCRel_4:
+  case VE::fixup_ve_reflong:
+  case VE::fixup_ve_hi32:
+  case VE::fixup_ve_lo32:
+  case VE::fixup_ve_pc_hi32:
+  case VE::fixup_ve_pc_lo32:
+  case VE::fixup_ve_got_hi32:
+  case VE::fixup_ve_got_lo32:
+  case VE::fixup_ve_gotoff_hi32:
+  case VE::fixup_ve_gotoff_lo32:
+  case VE::fixup_ve_plt_hi32:
+  case VE::fixup_ve_plt_lo32:
+  case VE::fixup_ve_tls_gd_hi32:
+  case VE::fixup_ve_tls_gd_lo32:
+  case VE::fixup_ve_tpoff_hi32:
+  case VE::fixup_ve_tpoff_lo32:
+    return 4;
+  case FK_Data_8:
+  case FK_PCRel_8:
+    return 8;
+  }
+}
+
+namespace {
+class VEAsmBackend : public MCAsmBackend {
+protected:
+  const Target &TheTarget;
+
+public:
+  VEAsmBackend(const Target &T) : MCAsmBackend(support::little), TheTarget(T) {}
+
+  unsigned getNumFixupKinds() const override { return VE::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+    const static MCFixupKindInfo Infos[VE::NumTargetFixupKinds] = {
+        // name, offset, bits, flags
+        {"fixup_ve_reflong", 0, 32, 0},
+        {"fixup_ve_hi32", 0, 32, 0},
+        {"fixup_ve_lo32", 0, 32, 0},
+        {"fixup_ve_pc_hi32", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_ve_pc_lo32", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_ve_got_hi32", 0, 32, 0},
+        {"fixup_ve_got_lo32", 0, 32, 0},
+        {"fixup_ve_gotoff_hi32", 0, 32, 0},
+        {"fixup_ve_gotoff_lo32", 0, 32, 0},
+        {"fixup_ve_plt_hi32", 0, 32, 0},
+        {"fixup_ve_plt_lo32", 0, 32, 0},
+        {"fixup_ve_tls_gd_hi32", 0, 32, 0},
+        {"fixup_ve_tls_gd_lo32", 0, 32, 0},
+        {"fixup_ve_tpoff_hi32", 0, 32, 0},
+        {"fixup_ve_tpoff_lo32", 0, 32, 0},
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return MCAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+                             const MCValue &Target) override {
+    switch ((VE::Fixups)Fixup.getKind()) {
+    default:
+      return false;
+    case VE::fixup_ve_tls_gd_hi32:
+    case VE::fixup_ve_tls_gd_lo32:
+    case VE::fixup_ve_tpoff_hi32:
+    case VE::fixup_ve_tpoff_lo32:
+      return true;
+    }
+  }
+
+  bool mayNeedRelaxation(const MCInst &Inst,
+                         const MCSubtargetInfo &STI) const override {
+    // Not implemented yet.  For example, if we have a branch with
+    // lager than SIMM32 immediate value, we want to relaxation such
+    // branch instructions.
+    return false;
+  }
+
+  /// fixupNeedsRelaxation - Target specific predicate for whether a given
+  /// fixup requires the associated instruction to be relaxed.
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    // Not implemented yet.  For example, if we have a branch with
+    // lager than SIMM32 immediate value, we want to relaxation such
+    // branch instructions.
+    return false;
+  }
+  void relaxInstruction(MCInst &Inst,
+                        const MCSubtargetInfo &STI) const override {
+    // Aurora VE doesn't support relaxInstruction yet.
+    llvm_unreachable("relaxInstruction() should not be called");
+  }
+
+  bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+    if ((Count % 8) != 0)
+      return false;
+
+    for (uint64_t i = 0; i < Count; i += 8)
+      support::endian::write<uint64_t>(OS, 0x7900000000000000ULL,
+                                       support::little);
+
+    return true;
+  }
+};
+
+class ELFVEAsmBackend : public VEAsmBackend {
+  Triple::OSType OSType;
+
+public:
+  ELFVEAsmBackend(const Target &T, Triple::OSType OSType)
+      : VEAsmBackend(T), OSType(OSType) {}
+
+  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+                  const MCValue &Target, MutableArrayRef<char> Data,
+                  uint64_t Value, bool IsResolved,
+                  const MCSubtargetInfo *STI) const override {
+    Value = adjustFixupValue(Fixup.getKind(), Value);
+    if (!Value)
+      return; // Doesn't change encoding.
+
+    MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+
+    // Shift the value into position.
+    Value <<= Info.TargetOffset;
+
+    unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+    unsigned Offset = Fixup.getOffset();
+    assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+    // For each byte of the fragment that the fixup touches, mask in the bits
+    // from the fixup value. The Value has been "split up" into the
+    // appropriate bitfields above.
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i;
+      Data[Offset + Idx] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
+    }
+  }
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType);
+    return createVEELFObjectWriter(OSABI);
+  }
+};
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createVEAsmBackend(const Target &T,
+                                       const MCSubtargetInfo &STI,
+                                       const MCRegisterInfo &MRI,
+                                       const MCTargetOptions &Options) {
+  return new ELFVEAsmBackend(T, STI.getTargetTriple().getOS());
+}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
new file mode 100644
index 0000000000000..741e8320a9411
--- /dev/null
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
@@ -0,0 +1,135 @@
+//===-- VEELFObjectWriter.cpp - VE ELF Writer -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEFixupKinds.h"
+#include "VEMCExpr.h"
+#include "VEMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class VEELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  VEELFObjectWriter(uint8_t OSABI)
+      : MCELFObjectTargetWriter(/* Is64Bit */ true, OSABI, ELF::EM_VE,
+                                /* HasRelocationAddend */ true) {}
+
+  ~VEELFObjectWriter() override {}
+
+protected:
+  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+                        const MCFixup &Fixup, bool IsPCRel) const override;
+
+  bool needsRelocateWithSymbol(const MCSymbol &Sym,
+                               unsigned Type) const override;
+};
+} // namespace
+
+unsigned VEELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
+                                         const MCFixup &Fixup,
+                                         bool IsPCRel) const {
+  if (const VEMCExpr *SExpr = dyn_cast<VEMCExpr>(Fixup.getValue())) {
+    if (SExpr->getKind() == VEMCExpr::VK_VE_PC_LO32)
+      return ELF::R_VE_PC_LO32;
+  }
+
+  if (IsPCRel) {
+    switch (Fixup.getTargetKind()) {
+    default:
+      llvm_unreachable("Unimplemented fixup -> relocation");
+    case FK_PCRel_1:
+      llvm_unreachable("Unimplemented fixup fk_data_1 -> relocation");
+    case FK_PCRel_2:
+      llvm_unreachable("Unimplemented fixup fk_data_2 -> relocation");
+    // FIXME: relative kind?
+    case FK_PCRel_4:
+      return ELF::R_VE_REFLONG;
+    case FK_PCRel_8:
+      return ELF::R_VE_REFQUAD;
+    case VE::fixup_ve_pc_hi32:
+      return ELF::R_VE_PC_HI32;
+    case VE::fixup_ve_pc_lo32:
+      return ELF::R_VE_PC_LO32;
+    }
+  }
+
+  switch (Fixup.getTargetKind()) {
+  default:
+    llvm_unreachable("Unimplemented fixup -> relocation");
+  case FK_Data_1:
+    llvm_unreachable("Unimplemented fixup fk_data_1 -> relocation");
+  case FK_Data_2:
+    llvm_unreachable("Unimplemented fixup fk_data_2 -> relocation");
+  case FK_Data_4:
+    return ELF::R_VE_REFLONG;
+  case FK_Data_8:
+    return ELF::R_VE_REFQUAD;
+  case VE::fixup_ve_reflong:
+    return ELF::R_VE_REFLONG;
+  case VE::fixup_ve_hi32:
+    return ELF::R_VE_HI32;
+  case VE::fixup_ve_lo32:
+    return ELF::R_VE_LO32;
+  case VE::fixup_ve_pc_hi32:
+    llvm_unreachable("Unimplemented fixup pc_hi32 -> relocation");
+  case VE::fixup_ve_pc_lo32:
+    llvm_unreachable("Unimplemented fixup pc_lo32 -> relocation");
+  case VE::fixup_ve_got_hi32:
+    return ELF::R_VE_GOT_HI32;
+  case VE::fixup_ve_got_lo32:
+    return ELF::R_VE_GOT_LO32;
+  case VE::fixup_ve_gotoff_hi32:
+    return ELF::R_VE_GOTOFF_HI32;
+  case VE::fixup_ve_gotoff_lo32:
+    return ELF::R_VE_GOTOFF_LO32;
+  case VE::fixup_ve_plt_hi32:
+    return ELF::R_VE_PLT_HI32;
+  case VE::fixup_ve_plt_lo32:
+    return ELF::R_VE_PLT_LO32;
+  case VE::fixup_ve_tls_gd_hi32:
+    return ELF::R_VE_TLS_GD_HI32;
+  case VE::fixup_ve_tls_gd_lo32:
+    return ELF::R_VE_TLS_GD_LO32;
+  case VE::fixup_ve_tpoff_hi32:
+    return ELF::R_VE_TPOFF_HI32;
+  case VE::fixup_ve_tpoff_lo32:
+    return ELF::R_VE_TPOFF_LO32;
+  }
+
+  return ELF::R_VE_NONE;
+}
+
+bool VEELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+                                                unsigned Type) const {
+  switch (Type) {
+  default:
+    return false;
+
+  // All relocations that use a GOT need a symbol, not an offset, as
+  // the offset of the symbol within the section is irrelevant to
+  // where the GOT entry is. Don't need to list all the TLS entries,
+  // as they're all marked as requiring a symbol anyways.
+  case ELF::R_VE_GOT_HI32:
+  case ELF::R_VE_GOT_LO32:
+  case ELF::R_VE_GOTOFF_HI32:
+  case ELF::R_VE_GOTOFF_LO32:
+  case ELF::R_VE_TLS_GD_HI32:
+  case ELF::R_VE_TLS_GD_LO32:
+    return true;
+  }
+}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createVEELFObjectWriter(uint8_t OSABI) {
+  return std::make_unique<VEELFObjectWriter>(OSABI);
+}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
new file mode 100644
index 0000000000000..5d5dc1c5c891a
--- /dev/null
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
@@ -0,0 +1,61 @@
+//===-- VEFixupKinds.h - VE Specific Fixup Entries --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H
+#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace VE {
+enum Fixups {
+  /// fixup_ve_reflong - 32-bit fixup corresponding to foo
+  fixup_ve_reflong = FirstTargetFixupKind,
+
+  /// fixup_ve_hi32 - 32-bit fixup corresponding to foo@hi
+  fixup_ve_hi32,
+
+  /// fixup_ve_lo32 - 32-bit fixup corresponding to foo@lo
+  fixup_ve_lo32,
+
+  /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo@pc_hi
+  fixup_ve_pc_hi32,
+
+  /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo@pc_lo
+  fixup_ve_pc_lo32,
+
+  /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo@got_hi
+  fixup_ve_got_hi32,
+
+  /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo@got_lo
+  fixup_ve_got_lo32,
+
+  /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo@gotoff_hi
+  fixup_ve_gotoff_hi32,
+
+  /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo@gotoff_lo
+  fixup_ve_gotoff_lo32,
+
+  /// fixup_ve_plt_hi32/lo32
+  fixup_ve_plt_hi32,
+  fixup_ve_plt_lo32,
+
+  /// fixups for Thread Local Storage
+  fixup_ve_tls_gd_hi32,
+  fixup_ve_tls_gd_lo32,
+  fixup_ve_tpoff_hi32,
+  fixup_ve_tpoff_lo32,
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // namespace VE
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
new file mode 100644
index 0000000000000..1fe9423e01b80
--- /dev/null
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
@@ -0,0 +1,227 @@
+//===-- VEInstPrinter.cpp - Convert VE MCInst to assembly syntax -----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an VE MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEInstPrinter.h"
+#include "VE.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-asmprinter"
+
+// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target
+// namespace.
+namespace llvm {
+namespace VE {
+using namespace VE;
+}
+} // namespace llvm
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "VEGenAsmWriter.inc"
+
+void VEInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  // Generic registers have identical register name among register classes.
+  unsigned AltIdx = VE::AsmName;
+  // Misc registers have each own name, so no use alt-names.
+  if (MRI.getRegClass(VE::MISCRegClassID).contains(RegNo))
+    AltIdx = VE::NoRegAltName;
+  OS << '%' << getRegisterName(RegNo, AltIdx);
+}
+
+void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+                              StringRef Annot, const MCSubtargetInfo &STI,
+                              raw_ostream &OS) {
+  if (!printAliasInstr(MI, Address, STI, OS))
+    printInstruction(MI, Address, STI, OS);
+  printAnnotation(OS, Annot);
+}
+
+void VEInstPrinter::printOperand(const MCInst *MI, int OpNum,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  if (MO.isReg()) {
+    printRegName(O, MO.getReg());
+    return;
+  }
+
+  if (MO.isImm()) {
+    switch (MI->getOpcode()) {
+    default:
+      // Expects signed 32bit literals
+      int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
+      O << TruncatedImm;
+      return;
+    }
+  }
+
+  assert(MO.isExpr() && "Unknown operand kind in printOperand");
+  MO.getExpr()->print(O, &MAI);
+}
+
+void VEInstPrinter::printMemASXOperand(const MCInst *MI, int OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O, const char *Modifier) {
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    printOperand(MI, OpNum, STI, O);
+    O << ", ";
+    printOperand(MI, OpNum + 1, STI, O);
+    return;
+  }
+
+  if (MI->getOperand(OpNum + 2).isImm() &&
+      MI->getOperand(OpNum + 2).getImm() == 0) {
+    // don't print "+0"
+  } else {
+    printOperand(MI, OpNum + 2, STI, O);
+  }
+  if (MI->getOperand(OpNum + 1).isImm() &&
+      MI->getOperand(OpNum + 1).getImm() == 0 &&
+      MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+    if (MI->getOperand(OpNum + 2).isImm() &&
+        MI->getOperand(OpNum + 2).getImm() == 0) {
+      O << "0";
+    } else {
+      // don't print "+0,+0"
+    }
+  } else {
+    O << "(";
+    if (MI->getOperand(OpNum + 1).isImm() &&
+        MI->getOperand(OpNum + 1).getImm() == 0) {
+      // don't print "+0"
+    } else {
+      printOperand(MI, OpNum + 1, STI, O);
+    }
+    if (MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+      // don't print "+0"
+    } else {
+      O << ", ";
+      printOperand(MI, OpNum, STI, O);
+    }
+    O << ")";
+  }
+}
+
+void VEInstPrinter::printMemASOperandASX(const MCInst *MI, int OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O, const char *Modifier) {
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    printOperand(MI, OpNum, STI, O);
+    O << ", ";
+    printOperand(MI, OpNum + 1, STI, O);
+    return;
+  }
+
+  if (MI->getOperand(OpNum + 1).isImm() &&
+      MI->getOperand(OpNum + 1).getImm() == 0) {
+    // don't print "+0"
+  } else {
+    printOperand(MI, OpNum + 1, STI, O);
+  }
+  if (MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+    if (MI->getOperand(OpNum + 1).isImm() &&
+        MI->getOperand(OpNum + 1).getImm() == 0) {
+      O << "0";
+    } else {
+      // don't print "(0)"
+    }
+  } else {
+    O << "(, ";
+    printOperand(MI, OpNum, STI, O);
+    O << ")";
+  }
+}
+
+void VEInstPrinter::printMemASOperandRRM(const MCInst *MI, int OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O, const char *Modifier) {
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    printOperand(MI, OpNum, STI, O);
+    O << ", ";
+    printOperand(MI, OpNum + 1, STI, O);
+    return;
+  }
+
+  if (MI->getOperand(OpNum + 1).isImm() &&
+      MI->getOperand(OpNum + 1).getImm() == 0) {
+    // don't print "+0"
+  } else {
+    printOperand(MI, OpNum + 1, STI, O);
+  }
+  if (MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+    if (MI->getOperand(OpNum + 1).isImm() &&
+        MI->getOperand(OpNum + 1).getImm() == 0) {
+      O << "0";
+    } else {
+      // don't print "(0)"
+    }
+  } else {
+    O << "(";
+    printOperand(MI, OpNum, STI, O);
+    O << ")";
+  }
+}
+
+void VEInstPrinter::printMemASOperandHM(const MCInst *MI, int OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O, const char *Modifier) {
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    printOperand(MI, OpNum, STI, O);
+    O << ", ";
+    printOperand(MI, OpNum + 1, STI, O);
+    return;
+  }
+
+  if (MI->getOperand(OpNum + 1).isImm() &&
+      MI->getOperand(OpNum + 1).getImm() == 0) {
+    // don't print "+0"
+  } else {
+    printOperand(MI, OpNum + 1, STI, O);
+  }
+  O << "(";
+  if (MI->getOperand(OpNum).isReg())
+    printOperand(MI, OpNum, STI, O);
+  O << ")";
+}
+
+void VEInstPrinter::printMImmOperand(const MCInst *MI, int OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  int MImm = (int)MI->getOperand(OpNum).getImm() & 0x7f;
+  if (MImm > 63)
+    O << "(" << MImm - 64 << ")0";
+  else
+    O << "(" << MImm << ")1";
+}
+
+void VEInstPrinter::printCCOperand(const MCInst *MI, int OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
+  int CC = (int)MI->getOperand(OpNum).getImm();
+  O << VECondCodeToString((VECC::CondCode)CC);
+}
+
+void VEInstPrinter::printRDOperand(const MCInst *MI, int OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
+  int RD = (int)MI->getOperand(OpNum).getImm();
+  O << VERDToString((VERD::RoundingMode)RD);
+}
diff --git a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
index 05a53d59e8782..657cc513b3c50 100644
--- a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H
 #define LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H
 
+#include "VEMCTargetDesc.h"
 #include "llvm/MC/MCInstPrinter.h"
 
 namespace llvm {
@@ -28,20 +29,32 @@ public:
                  const MCSubtargetInfo &STI, raw_ostream &OS) override;
 
   // Autogenerated by tblgen.
-  bool printAliasInstr(const MCInst *, const MCSubtargetInfo &, raw_ostream &);
+  bool printAliasInstr(const MCInst *, uint64_t Address,
+                       const MCSubtargetInfo &, raw_ostream &);
   void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &,
                         raw_ostream &);
-  static const char *getRegisterName(unsigned RegNo);
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = VE::NoRegAltName);
 
-  void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+  void printOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
                     raw_ostream &OS);
-  void printMemASXOperand(const MCInst *MI, int opNum,
+  void printMemASXOperand(const MCInst *MI, int OpNum,
                           const MCSubtargetInfo &STI, raw_ostream &OS,
                           const char *Modifier = nullptr);
-  void printMemASOperand(const MCInst *MI, int opNum,
-                         const MCSubtargetInfo &STI, raw_ostream &OS,
-                         const char *Modifier = nullptr);
-  void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+  void printMemASOperandASX(const MCInst *MI, int OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &OS,
+                            const char *Modifier = nullptr);
+  void printMemASOperandRRM(const MCInst *MI, int OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &OS,
+                            const char *Modifier = nullptr);
+  void printMemASOperandHM(const MCInst *MI, int OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &OS,
+                           const char *Modifier = nullptr);
+  void printMImmOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
+                        raw_ostream &OS);
+  void printCCOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
+                      raw_ostream &OS);
+  void printRDOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
                       raw_ostream &OS);
 };
 } // namespace llvm
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
index 9f29fc092c697..76824335239b7 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
@@ -37,4 +37,5 @@ VEELFMCAsmInfo::VEELFMCAsmInfo(const Triple &TheTriple) {
   UsesELFSectionDirectiveForBSS = true;
 
   SupportsDebugInformation = true;
+  UseIntegratedAssembler = false;
 }
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
new file mode 100644
index 0000000000000..d50d8fcae9daa
--- /dev/null
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
@@ -0,0 +1,165 @@
+//===-- VEMCCodeEmitter.cpp - Convert VE code to machine code -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VEMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEFixupKinds.h"
+#include "VE.h"
+#include "VEMCExpr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+
+class VEMCCodeEmitter : public MCCodeEmitter {
+  const MCInstrInfo &MCII;
+  MCContext &Ctx;
+
+public:
+  VEMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+      : MCII(mcii), Ctx(ctx) {}
+  VEMCCodeEmitter(const VEMCCodeEmitter &) = delete;
+  VEMCCodeEmitter &operator=(const VEMCCodeEmitter &) = delete;
+  ~VEMCCodeEmitter() override = default;
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  uint64_t getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                  SmallVectorImpl<MCFixup> &Fixups,
+                                  const MCSubtargetInfo &STI) const;
+  uint64_t getCCOpValue(const MCInst &MI, unsigned OpNo,
+                        SmallVectorImpl<MCFixup> &Fixups,
+                        const MCSubtargetInfo &STI) const;
+  uint64_t getRDOpValue(const MCInst &MI, unsigned OpNo,
+                        SmallVectorImpl<MCFixup> &Fixups,
+                        const MCSubtargetInfo &STI) const;
+
+private:
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
+};
+
+} // end anonymous namespace
+
+void VEMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
+
+  uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+  support::endian::write<uint64_t>(OS, Bits, support::little);
+
+  ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+unsigned VEMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                            const MCOperand &MO,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  if (MO.isReg())
+    return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+  if (MO.isImm())
+    return MO.getImm();
+
+  assert(MO.isExpr());
+  const MCExpr *Expr = MO.getExpr();
+  if (const VEMCExpr *SExpr = dyn_cast<VEMCExpr>(Expr)) {
+    MCFixupKind Kind = (MCFixupKind)SExpr->getFixupKind();
+    Fixups.push_back(MCFixup::create(0, Expr, Kind));
+    return 0;
+  }
+
+  int64_t Res;
+  if (Expr->evaluateAsAbsolute(Res))
+    return Res;
+
+  llvm_unreachable("Unhandled expression!");
+  return 0;
+}
+
+uint64_t
+VEMCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm())
+    return getMachineOpValue(MI, MO, Fixups, STI);
+
+  Fixups.push_back(
+      MCFixup::create(0, MO.getExpr(), (MCFixupKind)VE::fixup_ve_pc_lo32));
+  return 0;
+}
+
+uint64_t VEMCCodeEmitter::getCCOpValue(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return VECondCodeToVal(
+        static_cast<VECC::CondCode>(getMachineOpValue(MI, MO, Fixups, STI)));
+  return 0;
+}
+
+uint64_t VEMCCodeEmitter::getRDOpValue(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return VERDToVal(static_cast<VERD::RoundingMode>(
+        getMachineOpValue(MI, MO, Fixups, STI)));
+  return 0;
+}
+
+#define ENABLE_INSTR_PREDICATE_VERIFIER
+#include "VEGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createVEMCCodeEmitter(const MCInstrInfo &MCII,
+                                           const MCRegisterInfo &MRI,
+                                           MCContext &Ctx) {
+  return new VEMCCodeEmitter(MCII, Ctx);
+}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
new file mode 100644
index 0000000000000..a3ce3b3309be8
--- /dev/null
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
@@ -0,0 +1,225 @@
+//===-- VEMCExpr.cpp - VE specific MC expression classes ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the assembly expression modifiers
+// accepted by the VE architecture (e.g. "%hi", "%lo", ...).
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMCExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/BinaryFormat/ELF.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vemcexpr"
+
+const VEMCExpr *VEMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+                                 MCContext &Ctx) {
+  return new (Ctx) VEMCExpr(Kind, Expr);
+}
+
+void VEMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+
+  bool closeParen = printVariantKind(OS, Kind);
+
+  const MCExpr *Expr = getSubExpr();
+  Expr->print(OS, MAI);
+
+  if (closeParen)
+    OS << ')';
+  printVariantKindSuffix(OS, Kind);
+}
+
+bool VEMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) {
+  switch (Kind) {
+  case VK_VE_None:
+  case VK_VE_REFLONG:
+    return false;
+
+  case VK_VE_HI32:
+  case VK_VE_LO32:
+  case VK_VE_PC_HI32:
+  case VK_VE_PC_LO32:
+  case VK_VE_GOT_HI32:
+  case VK_VE_GOT_LO32:
+  case VK_VE_GOTOFF_HI32:
+  case VK_VE_GOTOFF_LO32:
+  case VK_VE_PLT_HI32:
+  case VK_VE_PLT_LO32:
+  case VK_VE_TLS_GD_HI32:
+  case VK_VE_TLS_GD_LO32:
+  case VK_VE_TPOFF_HI32:
+  case VK_VE_TPOFF_LO32:
+    // Use suffix for these variant kinds
+    return false;
+  }
+  return true;
+}
+
+void VEMCExpr::printVariantKindSuffix(raw_ostream &OS, VariantKind Kind) {
+  switch (Kind) {
+  case VK_VE_None:
+  case VK_VE_REFLONG:
+    break;
+  case VK_VE_HI32:
+    OS << "@hi";
+    break;
+  case VK_VE_LO32:
+    OS << "@lo";
+    break;
+  case VK_VE_PC_HI32:
+    OS << "@pc_hi";
+    break;
+  case VK_VE_PC_LO32:
+    OS << "@pc_lo";
+    break;
+  case VK_VE_GOT_HI32:
+    OS << "@got_hi";
+    break;
+  case VK_VE_GOT_LO32:
+    OS << "@got_lo";
+    break;
+  case VK_VE_GOTOFF_HI32:
+    OS << "@gotoff_hi";
+    break;
+  case VK_VE_GOTOFF_LO32:
+    OS << "@gotoff_lo";
+    break;
+  case VK_VE_PLT_HI32:
+    OS << "@plt_hi";
+    break;
+  case VK_VE_PLT_LO32:
+    OS << "@plt_lo";
+    break;
+  case VK_VE_TLS_GD_HI32:
+    OS << "@tls_gd_hi";
+    break;
+  case VK_VE_TLS_GD_LO32:
+    OS << "@tls_gd_lo";
+    break;
+  case VK_VE_TPOFF_HI32:
+    OS << "@tpoff_hi";
+    break;
+  case VK_VE_TPOFF_LO32:
+    OS << "@tpoff_lo";
+    break;
+  }
+}
+
+VEMCExpr::VariantKind VEMCExpr::parseVariantKind(StringRef name) {
+  return StringSwitch<VEMCExpr::VariantKind>(name)
+      .Case("hi", VK_VE_HI32)
+      .Case("lo", VK_VE_LO32)
+      .Case("pc_hi", VK_VE_PC_HI32)
+      .Case("pc_lo", VK_VE_PC_LO32)
+      .Case("got_hi", VK_VE_GOT_HI32)
+      .Case("got_lo", VK_VE_GOT_LO32)
+      .Case("gotoff_hi", VK_VE_GOTOFF_HI32)
+      .Case("gotoff_lo", VK_VE_GOTOFF_LO32)
+      .Case("plt_hi", VK_VE_PLT_HI32)
+      .Case("plt_lo", VK_VE_PLT_LO32)
+      .Case("tls_gd_hi", VK_VE_TLS_GD_HI32)
+      .Case("tls_gd_lo", VK_VE_TLS_GD_LO32)
+      .Case("tpoff_hi", VK_VE_TPOFF_HI32)
+      .Case("tpoff_lo", VK_VE_TPOFF_LO32)
+      .Default(VK_VE_None);
+}
+
+VE::Fixups VEMCExpr::getFixupKind(VEMCExpr::VariantKind Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unhandled VEMCExpr::VariantKind");
+  case VK_VE_REFLONG:
+    return VE::fixup_ve_reflong;
+  case VK_VE_HI32:
+    return VE::fixup_ve_hi32;
+  case VK_VE_LO32:
+    return VE::fixup_ve_lo32;
+  case VK_VE_PC_HI32:
+    return VE::fixup_ve_pc_hi32;
+  case VK_VE_PC_LO32:
+    return VE::fixup_ve_pc_lo32;
+  case VK_VE_GOT_HI32:
+    return VE::fixup_ve_got_hi32;
+  case VK_VE_GOT_LO32:
+    return VE::fixup_ve_got_lo32;
+  case VK_VE_GOTOFF_HI32:
+    return VE::fixup_ve_gotoff_hi32;
+  case VK_VE_GOTOFF_LO32:
+    return VE::fixup_ve_gotoff_lo32;
+  case VK_VE_PLT_HI32:
+    return VE::fixup_ve_plt_hi32;
+  case VK_VE_PLT_LO32:
+    return VE::fixup_ve_plt_lo32;
+  case VK_VE_TLS_GD_HI32:
+    return VE::fixup_ve_tls_gd_hi32;
+  case VK_VE_TLS_GD_LO32:
+    return VE::fixup_ve_tls_gd_lo32;
+  case VK_VE_TPOFF_HI32:
+    return VE::fixup_ve_tpoff_hi32;
+  case VK_VE_TPOFF_LO32:
+    return VE::fixup_ve_tpoff_lo32;
+  }
+}
+
+bool VEMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
+                                         const MCAsmLayout *Layout,
+                                         const MCFixup *Fixup) const {
+  return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+  switch (Expr->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expr!");
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    // We're known to be under a TLS fixup, so any symbol should be
+    // modified. There should be only one.
+    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+    cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void VEMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+  Streamer.visitUsedExpr(*getSubExpr());
+}
+
+void VEMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+  switch (getKind()) {
+  default:
+    return;
+  case VK_VE_TLS_GD_HI32:
+  case VK_VE_TLS_GD_LO32:
+  case VK_VE_TPOFF_HI32:
+  case VK_VE_TPOFF_LO32:
+    break;
+  }
+  fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
new file mode 100644
index 0000000000000..2b0c44576099d
--- /dev/null
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
@@ -0,0 +1,95 @@
+//====- VEMCExpr.h - VE specific MC expression classes --------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes VE-specific MCExprs, used for modifiers like
+// "%hi" or "%lo" etc.,
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H
+#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H
+
+#include "VEFixupKinds.h"
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class StringRef;
+class VEMCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_VE_None,
+    VK_VE_REFLONG,
+    VK_VE_HI32,
+    VK_VE_LO32,
+    VK_VE_PC_HI32,
+    VK_VE_PC_LO32,
+    VK_VE_GOT_HI32,
+    VK_VE_GOT_LO32,
+    VK_VE_GOTOFF_HI32,
+    VK_VE_GOTOFF_LO32,
+    VK_VE_PLT_HI32,
+    VK_VE_PLT_LO32,
+    VK_VE_TLS_GD_HI32,
+    VK_VE_TLS_GD_LO32,
+    VK_VE_TPOFF_HI32,
+    VK_VE_TPOFF_LO32,
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit VEMCExpr(VariantKind Kind, const MCExpr *Expr)
+      : Kind(Kind), Expr(Expr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const VEMCExpr *create(VariantKind Kind, const MCExpr *Expr,
+                                MCContext &Ctx);
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// getFixupKind - Get the fixup kind of this expression.
+  VE::Fixups getFixupKind() const { return getFixupKind(Kind); }
+
+  /// @}
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
+  void visitUsedExpr(MCStreamer &Streamer) const override;
+  MCFragment *findAssociatedFragment() const override {
+    return getSubExpr()->findAssociatedFragment();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+  static bool classof(const VEMCExpr *) { return true; }
+
+  static VariantKind parseVariantKind(StringRef name);
+  static bool printVariantKind(raw_ostream &OS, VariantKind Kind);
+  static void printVariantKindSuffix(raw_ostream &OS, VariantKind Kind);
+  static VE::Fixups getFixupKind(VariantKind Kind);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index b228617058a6a..a39cffc8f4a65 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -11,7 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "VEMCTargetDesc.h"
-#include "InstPrinter/VEInstPrinter.h"
+#include "TargetInfo/VETargetInfo.h"
+#include "VEInstPrinter.h"
 #include "VEMCAsmInfo.h"
 #include "VETargetStreamer.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -35,7 +36,7 @@ static MCAsmInfo *createVEMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT,
                                     const MCTargetOptions &Options) {
   MCAsmInfo *MAI = new VEELFMCAsmInfo(TT);
   unsigned Reg = MRI.getDwarfRegNum(VE::SX11, true);
-  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
+  MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, Reg, 0);
   MAI->addInitialFrameState(Inst);
   return MAI;
 }
@@ -93,6 +94,12 @@ extern "C" void LLVMInitializeVETargetMC() {
     // Register the MC subtarget info.
     TargetRegistry::RegisterMCSubtargetInfo(*T, createVEMCSubtargetInfo);
 
+    // Register the MC Code Emitter.
+    TargetRegistry::RegisterMCCodeEmitter(*T, createVEMCCodeEmitter);
+
+    // Register the asm backend.
+    TargetRegistry::RegisterMCAsmBackend(*T, createVEAsmBackend);
+
     // Register the object target streamer.
     TargetRegistry::RegisterObjectTargetStreamer(*T,
                                                  createObjectTargetStreamer);
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
index 24a5c8209be27..7fb8a556aa749 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
@@ -22,7 +22,7 @@ class MCAsmBackend;
 class MCCodeEmitter;
 class MCContext;
 class MCInstrInfo;
-class MCObjectWriter;
+class MCObjectTargetWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class MCTargetOptions;
@@ -32,8 +32,12 @@ class StringRef;
 class raw_pwrite_stream;
 class raw_ostream;
 
-Target &getTheVETarget();
-
+MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII,
+                                     const MCRegisterInfo &MRI, MCContext &Ctx);
+MCAsmBackend *createVEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                 const MCRegisterInfo &MRI,
+                                 const MCTargetOptions &Options);
+std::unique_ptr<MCObjectTargetWriter> createVEELFObjectWriter(uint8_t OSABI);
 } // namespace llvm
 
 // Defines symbolic names for VE registers.  This defines a mapping from
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
index dfe94bbaaa4bf..29f5afb67ac14 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "VETargetStreamer.h"
-#include "InstPrinter/VEInstPrinter.h"
+#include "VEInstPrinter.h"
 #include "llvm/Support/FormattedStream.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
index be68fe7d24291..65bd142fe0dbb 100644
--- a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
+++ b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "VE.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/VETargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.h b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.h
new file mode 100644
index 0000000000000..7879e6f069a18
--- /dev/null
+++ b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.h
@@ -0,0 +1,20 @@
+//===-- VETargetInfo.h - VE Target Implementation ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_TARGETINFO_VETARGETINFO_H
+#define LLVM_LIB_TARGET_VE_TARGETINFO_VETARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheVETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_VE_TARGETINFO_VETARGETINFO_H
diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index 9b61f2b63f36c..7ed7797cbb832 100644
--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_VE_VE_H
 
 #include "MCTargetDesc/VEMCTargetDesc.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -37,36 +38,50 @@ namespace llvm {
 // Enums corresponding to VE condition codes, both icc's and fcc's.  These
 // values must be kept in sync with the ones in the .td file.
 namespace VECC {
-enum CondCodes {
+enum CondCode {
   // Integer comparison
-  CC_IG =  0,  // Greater
-  CC_IL =  1,  // Less
+  CC_IG = 0,  // Greater
+  CC_IL = 1,  // Less
   CC_INE = 2, // Not Equal
   CC_IEQ = 3, // Equal
   CC_IGE = 4, // Greater or Equal
   CC_ILE = 5, // Less or Equal
 
   // Floating point comparison
-  CC_AF =     0 + 6, // Never
-  CC_G =      1 + 6, // Greater
-  CC_L =      2 + 6, // Less
-  CC_NE =     3 + 6, // Not Equal
-  CC_EQ =     4 + 6, // Equal
-  CC_GE =     5 + 6, // Greater or Equal
-  CC_LE =     6 + 6, // Less or Equal
-  CC_NUM =    7 + 6, // Number
-  CC_NAN =    8 + 6, // NaN
-  CC_GNAN =   9 + 6, // Greater or NaN
-  CC_LNAN =  10 + 6, // Less or NaN
+  CC_AF = 0 + 6,     // Never
+  CC_G = 1 + 6,      // Greater
+  CC_L = 2 + 6,      // Less
+  CC_NE = 3 + 6,     // Not Equal
+  CC_EQ = 4 + 6,     // Equal
+  CC_GE = 5 + 6,     // Greater or Equal
+  CC_LE = 6 + 6,     // Less or Equal
+  CC_NUM = 7 + 6,    // Number
+  CC_NAN = 8 + 6,    // NaN
+  CC_GNAN = 9 + 6,   // Greater or NaN
+  CC_LNAN = 10 + 6,  // Less or NaN
   CC_NENAN = 11 + 6, // Not Equal or NaN
   CC_EQNAN = 12 + 6, // Equal or NaN
   CC_GENAN = 13 + 6, // Greater or Equal or NaN
   CC_LENAN = 14 + 6, // Less or Equal or NaN
-  CC_AT =    15 + 6, // Always
+  CC_AT = 15 + 6,    // Always
+  UNKNOWN
+};
+}
+// Enums corresponding to VE Rounding Mode.  These values must be kept in
+// sync with the ones in the .td file.
+namespace VERD {
+enum RoundingMode {
+  RD_NONE = 0, // According to PSW
+  RD_RZ = 8,   // Round toward Zero
+  RD_RP = 9,   // Round toward Plus infinity
+  RD_RM = 10,  // Round toward Minus infinity
+  RD_RN = 11,  // Round to Nearest (ties to Even)
+  RD_RA = 12,  // Round to Nearest (ties to Away)
+  UNKNOWN
 };
 }
 
-inline static const char *VECondCodeToString(VECC::CondCodes CC) {
+inline static const char *VECondCodeToString(VECC::CondCode CC) {
   switch (CC) {
   case VECC::CC_IG:    return "gt";
   case VECC::CC_IL:    return "lt";
@@ -90,20 +105,252 @@ inline static const char *VECondCodeToString(VECC::CondCodes CC) {
   case VECC::CC_GENAN: return "genan";
   case VECC::CC_LENAN: return "lenan";
   case VECC::CC_AT:    return "at";
+  default:
+    llvm_unreachable("Invalid cond code");
+  }
+}
+
+inline static VECC::CondCode stringToVEICondCode(StringRef S) {
+  return StringSwitch<VECC::CondCode>(S)
+      .Case("gt", VECC::CC_IG)
+      .Case("lt", VECC::CC_IL)
+      .Case("ne", VECC::CC_INE)
+      .Case("eq", VECC::CC_IEQ)
+      .Case("ge", VECC::CC_IGE)
+      .Case("le", VECC::CC_ILE)
+      .Case("af", VECC::CC_AF)
+      .Case("at", VECC::CC_AT)
+      .Case("", VECC::CC_AT)
+      .Default(VECC::UNKNOWN);
+}
+
+inline static VECC::CondCode stringToVEFCondCode(StringRef S) {
+  return StringSwitch<VECC::CondCode>(S)
+      .Case("gt", VECC::CC_G)
+      .Case("lt", VECC::CC_L)
+      .Case("ne", VECC::CC_NE)
+      .Case("eq", VECC::CC_EQ)
+      .Case("ge", VECC::CC_GE)
+      .Case("le", VECC::CC_LE)
+      .Case("num", VECC::CC_NUM)
+      .Case("nan", VECC::CC_NAN)
+      .Case("gtnan", VECC::CC_GNAN)
+      .Case("ltnan", VECC::CC_LNAN)
+      .Case("nenan", VECC::CC_NENAN)
+      .Case("eqnan", VECC::CC_EQNAN)
+      .Case("genan", VECC::CC_GENAN)
+      .Case("lenan", VECC::CC_LENAN)
+      .Case("af", VECC::CC_AF)
+      .Case("at", VECC::CC_AT)
+      .Case("", VECC::CC_AT)
+      .Default(VECC::UNKNOWN);
+}
+
+inline static unsigned VECondCodeToVal(VECC::CondCode CC) {
+  switch (CC) {
+  case VECC::CC_IG:
+    return 1;
+  case VECC::CC_IL:
+    return 2;
+  case VECC::CC_INE:
+    return 3;
+  case VECC::CC_IEQ:
+    return 4;
+  case VECC::CC_IGE:
+    return 5;
+  case VECC::CC_ILE:
+    return 6;
+  case VECC::CC_AF:
+    return 0;
+  case VECC::CC_G:
+    return 1;
+  case VECC::CC_L:
+    return 2;
+  case VECC::CC_NE:
+    return 3;
+  case VECC::CC_EQ:
+    return 4;
+  case VECC::CC_GE:
+    return 5;
+  case VECC::CC_LE:
+    return 6;
+  case VECC::CC_NUM:
+    return 7;
+  case VECC::CC_NAN:
+    return 8;
+  case VECC::CC_GNAN:
+    return 9;
+  case VECC::CC_LNAN:
+    return 10;
+  case VECC::CC_NENAN:
+    return 11;
+  case VECC::CC_EQNAN:
+    return 12;
+  case VECC::CC_GENAN:
+    return 13;
+  case VECC::CC_LENAN:
+    return 14;
+  case VECC::CC_AT:
+    return 15;
+  default:
+    llvm_unreachable("Invalid cond code");
+  }
+}
+
+inline static VECC::CondCode VEValToCondCode(unsigned Val, bool IsInteger) {
+  if (IsInteger) {
+    switch (Val) {
+    case 0:
+      return VECC::CC_AF;
+    case 1:
+      return VECC::CC_IG;
+    case 2:
+      return VECC::CC_IL;
+    case 3:
+      return VECC::CC_INE;
+    case 4:
+      return VECC::CC_IEQ;
+    case 5:
+      return VECC::CC_IGE;
+    case 6:
+      return VECC::CC_ILE;
+    case 15:
+      return VECC::CC_AT;
+    }
+  } else {
+    switch (Val) {
+    case 0:
+      return VECC::CC_AF;
+    case 1:
+      return VECC::CC_G;
+    case 2:
+      return VECC::CC_L;
+    case 3:
+      return VECC::CC_NE;
+    case 4:
+      return VECC::CC_EQ;
+    case 5:
+      return VECC::CC_GE;
+    case 6:
+      return VECC::CC_LE;
+    case 7:
+      return VECC::CC_NUM;
+    case 8:
+      return VECC::CC_NAN;
+    case 9:
+      return VECC::CC_GNAN;
+    case 10:
+      return VECC::CC_LNAN;
+    case 11:
+      return VECC::CC_NENAN;
+    case 12:
+      return VECC::CC_EQNAN;
+    case 13:
+      return VECC::CC_GENAN;
+    case 14:
+      return VECC::CC_LENAN;
+    case 15:
+      return VECC::CC_AT;
+    }
   }
   llvm_unreachable("Invalid cond code");
 }
 
-// Different to Hi_32/Lo_32 the HI32 and LO32 functions
-// preserve the correct numerical value
-// on the LLVM data type for MC immediates (int64_t).
-inline static int64_t HI32(int64_t imm) {
-  return (int32_t)(imm >> 32);
+inline static const char *VERDToString(VERD::RoundingMode R) {
+  switch (R) {
+  case VERD::RD_NONE:
+    return "";
+  case VERD::RD_RZ:
+    return ".rz";
+  case VERD::RD_RP:
+    return ".rp";
+  case VERD::RD_RM:
+    return ".rm";
+  case VERD::RD_RN:
+    return ".rn";
+  case VERD::RD_RA:
+    return ".ra";
+  default:
+    llvm_unreachable("Invalid branch predicate");
+  }
 }
 
-inline static int64_t LO32(int64_t imm) {
-  return (int32_t)(imm);
+inline static VERD::RoundingMode stringToVERD(StringRef S) {
+  return StringSwitch<VERD::RoundingMode>(S)
+      .Case("", VERD::RD_NONE)
+      .Case(".rz", VERD::RD_RZ)
+      .Case(".rp", VERD::RD_RP)
+      .Case(".rm", VERD::RD_RM)
+      .Case(".rn", VERD::RD_RN)
+      .Case(".ra", VERD::RD_RA)
+      .Default(VERD::UNKNOWN);
 }
 
+inline static unsigned VERDToVal(VERD::RoundingMode R) {
+  switch (R) {
+  case VERD::RD_NONE:
+  case VERD::RD_RZ:
+  case VERD::RD_RP:
+  case VERD::RD_RM:
+  case VERD::RD_RN:
+  case VERD::RD_RA:
+    return static_cast<unsigned>(R);
+  default:
+    break;
+  }
+  llvm_unreachable("Invalid branch predicates");
+}
+
+inline static VERD::RoundingMode VEValToRD(unsigned Val) {
+  switch (Val) {
+  case static_cast<unsigned>(VERD::RD_NONE):
+    return VERD::RD_NONE;
+  case static_cast<unsigned>(VERD::RD_RZ):
+    return VERD::RD_RZ;
+  case static_cast<unsigned>(VERD::RD_RP):
+    return VERD::RD_RP;
+  case static_cast<unsigned>(VERD::RD_RM):
+    return VERD::RD_RM;
+  case static_cast<unsigned>(VERD::RD_RN):
+    return VERD::RD_RN;
+  case static_cast<unsigned>(VERD::RD_RA):
+    return VERD::RD_RA;
+  default:
+    break;
+  }
+  llvm_unreachable("Invalid branch predicates");
+}
+
+// MImm - Special immediate value of sequential bit stream of 0 or 1.
+//   See VEInstrInfo.td for details.
+inline static bool isMImmVal(uint64_t Val) {
+  if (Val == 0) {
+    // (0)1 is 0
+    return true;
+  }
+  if (isMask_64(Val)) {
+    // (m)0 patterns
+    return true;
+  }
+  // (m)1 patterns
+  return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+}
+
+inline static bool isMImm32Val(uint32_t Val) {
+  if (Val == 0) {
+    // (0)1 is 0
+    return true;
+  }
+  if (isMask_32(Val)) {
+    // (m)0 patterns
+    return true;
+  }
+  // (m)1 patterns
+  return (Val & (1 << 31)) && isShiftedMask_32(Val);
+}
+
+inline unsigned M0(unsigned Val) { return Val + 64; }
+inline unsigned M1(unsigned Val) { return Val; }
+
 } // namespace llvm
 #endif
diff --git a/llvm/lib/Target/VE/VE.td b/llvm/lib/Target/VE/VE.td
index 7404321b1a063..617a6ea458b68 100644
--- a/llvm/lib/Target/VE/VE.td
+++ b/llvm/lib/Target/VE/VE.td
@@ -29,6 +29,13 @@ include "VEInstrInfo.td"
 
 def VEInstrInfo : InstrInfo;
 
+def VEAsmParser : AsmParser {
+  // Use both VE register name matcher to accept "S0~S63" register names
+  // and default register matcher to accept other registeres.
+  let AllowDuplicateRegisterNames = 1;
+  let ShouldEmitMatchRegisterAltName = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // VE processors supported.
 //===----------------------------------------------------------------------===//
@@ -51,6 +58,7 @@ def VEAsmWriter : AsmWriter {
 def VE : Target {
   // Pull in Instruction Info:
   let InstructionSet = VEInstrInfo;
+  let AssemblyParsers = [VEAsmParser];
   let AssemblyWriters = [VEAsmWriter];
   let AllowRegisterRenaming = 1;
 }
diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 918f2a1acdaff..86e3aa3d3fa13 100644
--- a/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -11,8 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/VEInstPrinter.h"
+#include "MCTargetDesc/VEInstPrinter.h"
+#include "MCTargetDesc/VEMCExpr.h"
 #include "MCTargetDesc/VETargetStreamer.h"
+#include "TargetInfo/VETargetInfo.h"
 #include "VE.h"
 #include "VEInstrInfo.h"
 #include "VETargetMachine.h"
@@ -46,7 +48,14 @@ public:
 
   StringRef getPassName() const override { return "VE Assembly Printer"; }
 
-  void EmitInstruction(const MachineInstr *MI) override;
+  void lowerGETGOTAndEmitMCInsts(const MachineInstr *MI,
+                                 const MCSubtargetInfo &STI);
+  void lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
+                                    const MCSubtargetInfo &STI);
+  void lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
+                                     const MCSubtargetInfo &STI);
+
+  void emitInstruction(const MachineInstr *MI) override;
 
   static const char *getRegisterName(unsigned RegNo) {
     return VEInstPrinter::getRegisterName(RegNo);
@@ -54,7 +63,265 @@ public:
 };
 } // end of anonymous namespace
 
-void VEAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+static MCOperand createVEMCOperand(VEMCExpr::VariantKind Kind, MCSymbol *Sym,
+                                   MCContext &OutContext) {
+  const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Sym, OutContext);
+  const VEMCExpr *expr = VEMCExpr::create(Kind, MCSym, OutContext);
+  return MCOperand::createExpr(expr);
+}
+
+static MCOperand createGOTRelExprOp(VEMCExpr::VariantKind Kind,
+                                    MCSymbol *GOTLabel, MCContext &OutContext) {
+  const MCSymbolRefExpr *GOT = MCSymbolRefExpr::create(GOTLabel, OutContext);
+  const VEMCExpr *expr = VEMCExpr::create(Kind, GOT, OutContext);
+  return MCOperand::createExpr(expr);
+}
+
+static void emitSIC(MCStreamer &OutStreamer, MCOperand &RD,
+                    const MCSubtargetInfo &STI) {
+  MCInst SICInst;
+  SICInst.setOpcode(VE::SIC);
+  SICInst.addOperand(RD);
+  OutStreamer.emitInstruction(SICInst, STI);
+}
+
+static void emitBSIC(MCStreamer &OutStreamer, MCOperand &R1, MCOperand &R2,
+                     const MCSubtargetInfo &STI) {
+  MCInst BSICInst;
+  BSICInst.setOpcode(VE::BSICrii);
+  BSICInst.addOperand(R1);
+  BSICInst.addOperand(R2);
+  MCOperand czero = MCOperand::createImm(0);
+  BSICInst.addOperand(czero);
+  BSICInst.addOperand(czero);
+  OutStreamer.emitInstruction(BSICInst, STI);
+}
+
+static void emitLEAzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD,
+                       const MCSubtargetInfo &STI) {
+  MCInst LEAInst;
+  LEAInst.setOpcode(VE::LEAzii);
+  LEAInst.addOperand(RD);
+  MCOperand CZero = MCOperand::createImm(0);
+  LEAInst.addOperand(CZero);
+  LEAInst.addOperand(CZero);
+  LEAInst.addOperand(Imm);
+  OutStreamer.emitInstruction(LEAInst, STI);
+}
+
+static void emitLEASLzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD,
+                         const MCSubtargetInfo &STI) {
+  MCInst LEASLInst;
+  LEASLInst.setOpcode(VE::LEASLzii);
+  LEASLInst.addOperand(RD);
+  MCOperand CZero = MCOperand::createImm(0);
+  LEASLInst.addOperand(CZero);
+  LEASLInst.addOperand(CZero);
+  LEASLInst.addOperand(Imm);
+  OutStreamer.emitInstruction(LEASLInst, STI);
+}
+
+static void emitLEAzii(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm,
+                       MCOperand &RD, const MCSubtargetInfo &STI) {
+  MCInst LEAInst;
+  LEAInst.setOpcode(VE::LEAzii);
+  LEAInst.addOperand(RD);
+  MCOperand CZero = MCOperand::createImm(0);
+  LEAInst.addOperand(CZero);
+  LEAInst.addOperand(RS1);
+  LEAInst.addOperand(Imm);
+  OutStreamer.emitInstruction(LEAInst, STI);
+}
+
+static void emitLEASLrri(MCStreamer &OutStreamer, MCOperand &RS1,
+                         MCOperand &RS2, MCOperand &Imm, MCOperand &RD,
+                         const MCSubtargetInfo &STI) {
+  MCInst LEASLInst;
+  LEASLInst.setOpcode(VE::LEASLrri);
+  LEASLInst.addOperand(RD);
+  LEASLInst.addOperand(RS1);
+  LEASLInst.addOperand(RS2);
+  LEASLInst.addOperand(Imm);
+  OutStreamer.emitInstruction(LEASLInst, STI);
+}
+
+static void emitBinary(MCStreamer &OutStreamer, unsigned Opcode, MCOperand &RS1,
+                       MCOperand &Src2, MCOperand &RD,
+                       const MCSubtargetInfo &STI) {
+  MCInst Inst;
+  Inst.setOpcode(Opcode);
+  Inst.addOperand(RD);
+  Inst.addOperand(RS1);
+  Inst.addOperand(Src2);
+  OutStreamer.emitInstruction(Inst, STI);
+}
+
+static void emitANDrm(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm,
+                      MCOperand &RD, const MCSubtargetInfo &STI) {
+  emitBinary(OutStreamer, VE::ANDrm, RS1, Imm, RD, STI);
+}
+
+static void emitHiLo(MCStreamer &OutStreamer, MCSymbol *GOTSym,
+                     VEMCExpr::VariantKind HiKind, VEMCExpr::VariantKind LoKind,
+                     MCOperand &RD, MCContext &OutContext,
+                     const MCSubtargetInfo &STI) {
+
+  MCOperand hi = createVEMCOperand(HiKind, GOTSym, OutContext);
+  MCOperand lo = createVEMCOperand(LoKind, GOTSym, OutContext);
+  emitLEAzzi(OutStreamer, lo, RD, STI);
+  MCOperand M032 = MCOperand::createImm(M0(32));
+  emitANDrm(OutStreamer, RD, M032, RD, STI);
+  emitLEASLzzi(OutStreamer, hi, RD, STI);
+}
+
+void VEAsmPrinter::lowerGETGOTAndEmitMCInsts(const MachineInstr *MI,
+                                             const MCSubtargetInfo &STI) {
+  MCSymbol *GOTLabel =
+      OutContext.getOrCreateSymbol(Twine("_GLOBAL_OFFSET_TABLE_"));
+
+  const MachineOperand &MO = MI->getOperand(0);
+  MCOperand MCRegOP = MCOperand::createReg(MO.getReg());
+
+  if (!isPositionIndependent()) {
+    // Just load the address of GOT to MCRegOP.
+    switch (TM.getCodeModel()) {
+    default:
+      llvm_unreachable("Unsupported absolute code model");
+    case CodeModel::Small:
+    case CodeModel::Medium:
+    case CodeModel::Large:
+      emitHiLo(*OutStreamer, GOTLabel, VEMCExpr::VK_VE_HI32,
+               VEMCExpr::VK_VE_LO32, MCRegOP, OutContext, STI);
+      break;
+    }
+    return;
+  }
+
+  MCOperand RegGOT = MCOperand::createReg(VE::SX15); // GOT
+  MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT
+
+  // lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24)
+  // and %got, %got, (32)0
+  // sic %plt
+  // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt)
+  MCOperand cim24 = MCOperand::createImm(-24);
+  MCOperand loImm =
+      createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext);
+  emitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI);
+  MCOperand M032 = MCOperand::createImm(M0(32));
+  emitANDrm(*OutStreamer, MCRegOP, M032, MCRegOP, STI);
+  emitSIC(*OutStreamer, RegPLT, STI);
+  MCOperand hiImm =
+      createGOTRelExprOp(VEMCExpr::VK_VE_PC_HI32, GOTLabel, OutContext);
+  emitLEASLrri(*OutStreamer, RegGOT, RegPLT, hiImm, MCRegOP, STI);
+}
+
+void VEAsmPrinter::lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
+                                                const MCSubtargetInfo &STI) {
+  const MachineOperand &MO = MI->getOperand(0);
+  MCOperand MCRegOP = MCOperand::createReg(MO.getReg());
+  const MachineOperand &Addr = MI->getOperand(1);
+  MCSymbol *AddrSym = nullptr;
+
+  switch (Addr.getType()) {
+  default:
+    llvm_unreachable("<unknown operand type>");
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    report_fatal_error("MBB is not supported yet");
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    report_fatal_error("ConstantPool is not supported yet");
+    return;
+  case MachineOperand::MO_ExternalSymbol:
+    AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName());
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    AddrSym = getSymbol(Addr.getGlobal());
+    break;
+  }
+
+  if (!isPositionIndependent()) {
+    llvm_unreachable("Unsupported uses of %plt in not PIC code");
+    return;
+  }
+
+  MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT
+
+  // lea %dst, %plt_lo(func)(-24)
+  // and %dst, %dst, (32)0
+  // sic %plt                            ; FIXME: is it safe to use %plt here?
+  // lea.sl %dst, %plt_hi(func)(%dst, %plt)
+  MCOperand cim24 = MCOperand::createImm(-24);
+  MCOperand loImm =
+      createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext);
+  emitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI);
+  MCOperand M032 = MCOperand::createImm(M0(32));
+  emitANDrm(*OutStreamer, MCRegOP, M032, MCRegOP, STI);
+  emitSIC(*OutStreamer, RegPLT, STI);
+  MCOperand hiImm =
+      createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, AddrSym, OutContext);
+  emitLEASLrri(*OutStreamer, MCRegOP, RegPLT, hiImm, MCRegOP, STI);
+}
+
+void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
+                                                 const MCSubtargetInfo &STI) {
+  const MachineOperand &Addr = MI->getOperand(0);
+  MCSymbol *AddrSym = nullptr;
+
+  switch (Addr.getType()) {
+  default:
+    llvm_unreachable("<unknown operand type>");
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    report_fatal_error("MBB is not supported yet");
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    report_fatal_error("ConstantPool is not supported yet");
+    return;
+  case MachineOperand::MO_ExternalSymbol:
+    AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName());
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    AddrSym = getSymbol(Addr.getGlobal());
+    break;
+  }
+
+  MCOperand RegLR = MCOperand::createReg(VE::SX10);  // LR
+  MCOperand RegS0 = MCOperand::createReg(VE::SX0);   // S0
+  MCOperand RegS12 = MCOperand::createReg(VE::SX12); // S12
+  MCSymbol *GetTLSLabel = OutContext.getOrCreateSymbol(Twine("__tls_get_addr"));
+
+  // lea %s0, sym@tls_gd_lo(-24)
+  // and %s0, %s0, (32)0
+  // sic %lr
+  // lea.sl %s0, sym@tls_gd_hi(%s0, %lr)
+  // lea %s12, __tls_get_addr@plt_lo(8)
+  // and %s12, %s12, (32)0
+  // lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr)
+  // bsic %lr, (, %s12)
+  MCOperand cim24 = MCOperand::createImm(-24);
+  MCOperand loImm =
+      createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_LO32, AddrSym, OutContext);
+  emitLEAzii(*OutStreamer, cim24, loImm, RegS0, STI);
+  MCOperand M032 = MCOperand::createImm(M0(32));
+  emitANDrm(*OutStreamer, RegS0, M032, RegS0, STI);
+  emitSIC(*OutStreamer, RegLR, STI);
+  MCOperand hiImm =
+      createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_HI32, AddrSym, OutContext);
+  emitLEASLrri(*OutStreamer, RegS0, RegLR, hiImm, RegS0, STI);
+  MCOperand ci8 = MCOperand::createImm(8);
+  MCOperand loImm2 =
+      createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, GetTLSLabel, OutContext);
+  emitLEAzii(*OutStreamer, ci8, loImm2, RegS12, STI);
+  emitANDrm(*OutStreamer, RegS12, M032, RegS12, STI);
+  MCOperand hiImm2 =
+      createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, GetTLSLabel, OutContext);
+  emitLEASLrri(*OutStreamer, RegS12, RegLR, hiImm2, RegS12, STI);
+  emitBSIC(*OutStreamer, RegLR, RegS12, STI);
+}
+
+void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
 
   switch (MI->getOpcode()) {
   default:
@@ -62,7 +329,17 @@ void VEAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case TargetOpcode::DBG_VALUE:
     // FIXME: Debug Value.
     return;
+  case VE::GETGOT:
+    lowerGETGOTAndEmitMCInsts(MI, getSubtargetInfo());
+    return;
+  case VE::GETFUNPLT:
+    lowerGETFunPLTAndEmitMCInsts(MI, getSubtargetInfo());
+    return;
+  case VE::GETTLSADDR:
+    lowerGETTLSAddrAndEmitMCInsts(MI, getSubtargetInfo());
+    return;
   }
+
   MachineBasicBlock::const_instr_iterator I = MI->getIterator();
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
   do {
diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td
index 1a9097c79dd44..4f04dae884ab5 100644
--- a/llvm/lib/Target/VE/VECallingConv.td
+++ b/llvm/lib/Target/VE/VECallingConv.td
@@ -13,7 +13,77 @@
 //===----------------------------------------------------------------------===//
 // Aurora VE
 //===----------------------------------------------------------------------===//
+def CC_VE_C_Stack: CallingConv<[
+  // float --> need special handling like below.
+  //    0      4
+  //    +------+------+
+  //    | empty| float|
+  //    +------+------+
+  CCIfType<[f32], CCCustom<"allocateFloat">>,
+
+  // All of the rest are assigned to the stack in 8-byte aligned units.
+  CCAssignToStack<0, 8>
+]>;
+
+def CC_VE : CallingConv<[
+  // All arguments get passed in generic registers if there is space.
+
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+  // bool, char, int, enum, long --> generic integer 32 bit registers
+  CCIfType<[i32], CCAssignToRegWithShadow<
+    [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
+    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+  // float --> generic floating point 32 bit registers
+  CCIfType<[f32], CCAssignToRegWithShadow<
+    [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
+    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+  // long long/double --> generic 64 bit registers
+  CCIfType<[i64, f64],
+           CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+  // Alternatively, they are assigned to the stack in 8-byte aligned units.
+  CCDelegateTo<CC_VE_C_Stack>
+]>;
+
+// All arguments get passed in stack for varargs function or non-prototyped
+// function.
+def CC_VE2 : CallingConv<[
+  // float --> need special handling like below.
+  //    0      4
+  //    +------+------+
+  //    | empty| float|
+  //    +------+------+
+  CCIfType<[f32], CCCustom<"allocateFloat">>,
+
+  CCAssignToStack<0, 8>
+]>;
+
+def RetCC_VE : CallingConv<[
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+  // bool, char, int, enum, long --> generic integer 32 bit registers
+  CCIfType<[i32], CCAssignToRegWithShadow<
+    [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
+    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+  // float --> generic floating point 32 bit registers
+  CCIfType<[f32], CCAssignToRegWithShadow<
+    [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
+    [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+  // long long/double --> generic 64 bit registers
+  CCIfType<[i64, f64],
+           CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+]>;
 
 // Callee-saved registers
 def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
+def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;
diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp
index ef5b5f0559111..8b10e6466123a 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -12,6 +12,7 @@
 
 #include "VEFrameLowering.h"
 #include "VEInstrInfo.h"
+#include "VEMachineFunctionInfo.h"
 #include "VESubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -29,12 +30,13 @@ using namespace llvm;
 
 VEFrameLowering::VEFrameLowering(const VESubtarget &ST)
     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0,
-                          Align(16)) {}
+                          Align(16)),
+      STI(ST) {}
 
 void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
                                         MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MBBI,
-                                        int NumBytes,
+                                        uint64_t NumBytes,
                                         bool RequireFPUpdate) const {
 
   DebugLoc dl;
@@ -46,24 +48,35 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
   //    st %lr, 8(,%sp)
   //    st %got, 24(,%sp)
   //    st %plt, 32(,%sp)
+  //    st %s17, 40(,%sp) iff this function is using s17 as BP
   //    or %fp, 0, %sp
 
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
       .addReg(VE::SX11)
       .addImm(0)
+      .addImm(0)
       .addReg(VE::SX9);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
       .addReg(VE::SX11)
+      .addImm(0)
       .addImm(8)
       .addReg(VE::SX10);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
       .addReg(VE::SX11)
+      .addImm(0)
       .addImm(24)
       .addReg(VE::SX15);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+  BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
       .addReg(VE::SX11)
+      .addImm(0)
       .addImm(32)
       .addReg(VE::SX16);
+  if (hasBP(MF))
+    BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(40)
+        .addReg(VE::SX17);
   BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
       .addReg(VE::SX11)
       .addImm(0);
@@ -72,7 +85,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
 void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
                                         MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MBBI,
-                                        int NumBytes,
+                                        uint64_t NumBytes,
                                         bool RequireFPUpdate) const {
 
   DebugLoc dl;
@@ -81,6 +94,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
   // Insert following codes here as epilogue
   //
   //    or %sp, 0, %fp
+  //    ld %s17, 40(,%sp) iff this function is using s17 as BP
   //    ld %got, 32(,%sp)
   //    ld %plt, 24(,%sp)
   //    ld %lr, 8(,%sp)
@@ -89,30 +103,40 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
   BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
       .addReg(VE::SX9)
       .addImm(0);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX16)
+  if (hasBP(MF))
+    BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
+        .addReg(VE::SX11)
+        .addImm(0)
+        .addImm(40);
+  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
       .addReg(VE::SX11)
+      .addImm(0)
       .addImm(32);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX15)
+  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX15)
       .addReg(VE::SX11)
+      .addImm(0)
       .addImm(24);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX10)
+  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX10)
       .addReg(VE::SX11)
+      .addImm(0)
       .addImm(8);
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX9)
+  BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX9)
       .addReg(VE::SX11)
+      .addImm(0)
       .addImm(0);
 }
 
 void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
                                        MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MBBI,
-                                       int NumBytes) const {
+                                       int64_t NumBytes,
+                                       MaybeAlign MaybeAlign) const {
   DebugLoc dl;
   const VEInstrInfo &TII =
       *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
 
   if (NumBytes >= -64 && NumBytes < 63) {
-    BuildMI(MBB, MBBI, dl, TII.get(VE::ADXri), VE::SX11)
+    BuildMI(MBB, MBBI, dl, TII.get(VE::ADDSLri), VE::SX11)
         .addReg(VE::SX11)
         .addImm(NumBytes);
     return;
@@ -123,20 +147,28 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
   //   lea     %s13,%lo(NumBytes)
   //   and     %s13,%s13,(32)0
   //   lea.sl  %sp,%hi(NumBytes)(%sp, %s13)
-  BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzzi), VE::SX13)
-      .addImm(LO32(NumBytes));
-  BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm0), VE::SX13)
+  BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzii), VE::SX13)
+      .addImm(0)
+      .addImm(0)
+      .addImm(Lo_32(NumBytes));
+  BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX13)
       .addReg(VE::SX13)
-      .addImm(32);
+      .addImm(M0(32));
   BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11)
       .addReg(VE::SX11)
       .addReg(VE::SX13)
-      .addImm(HI32(NumBytes));
+      .addImm(Hi_32(NumBytes));
+
+  if (MaybeAlign) {
+    // and %sp, %sp, Align-1
+    BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
+        .addReg(VE::SX11)
+        .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
+  }
 }
 
 void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   int NumBytes) const {
+                                   MachineBasicBlock::iterator MBBI) const {
   DebugLoc dl;
   const VEInstrInfo &TII =
       *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -175,11 +207,8 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(Subtarget.getInstrInfo());
-  const VERegisterInfo &RegInfo =
-      *static_cast<const VERegisterInfo *>(Subtarget.getRegisterInfo());
+  const VEInstrInfo &TII = *STI.getInstrInfo();
+  const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
   MachineBasicBlock::iterator MBBI = MBB.begin();
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -191,39 +220,22 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
   // rather than reporting an error, as would be sensible. This is
   // poor, but fixing that bogosity is going to be a large project.
   // For now, just see if it's lied, and report an error here.
-  if (!NeedsStackRealignment && MFI.getMaxAlignment() > getStackAlignment())
+  if (!NeedsStackRealignment && MFI.getMaxAlign() > getStackAlign())
     report_fatal_error("Function \"" + Twine(MF.getName()) +
                        "\" required "
                        "stack re-alignment, but LLVM couldn't handle it "
                        "(probably because it has a dynamic alloca).");
 
   // Get the number of bytes to allocate from the FrameInfo
-  int NumBytes = (int)MFI.getStackSize();
-  // The VE ABI requires a reserved 176-byte area in the user's stack, starting
-  // at %sp + 16. This is for the callee Register Save Area (RSA).
-  //
-  // We therefore need to add that offset to the total stack size
-  // after all the stack objects are placed by
-  // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack
-  // needs to be aligned *after* the extra size is added, we need to disable
-  // calculateFrameObjectOffsets's built-in stack alignment, by having
-  // targetHandlesStackFrameRounding return true.
-
-  // Add the extra call frame stack size, if needed. (This is the same
-  // code as in PrologEpilogInserter, but also gets disabled by
-  // targetHandlesStackFrameRounding)
-  if (MFI.adjustsStack() && hasReservedCallFrame(MF))
-    NumBytes += MFI.getMaxCallFrameSize();
-
-  // Adds the VE subtarget-specific spill area to the stack
-  // size. Also ensures target-required alignment.
-  NumBytes = Subtarget.getAdjustedFrameSize(NumBytes);
+  uint64_t NumBytes = MFI.getStackSize();
+
+  // The VE ABI requires a reserved 176 bytes area at the top
+  // of stack as described in VESubtarget.cpp.  So, we adjust it here.
+  NumBytes = STI.getAdjustedFrameSize(NumBytes);
 
   // Finally, ensure that the size is sufficiently aligned for the
   // data on the stack.
-  if (MFI.getMaxAlignment() > 0) {
-    NumBytes = alignTo(NumBytes, MFI.getMaxAlignment());
-  }
+  NumBytes = alignTo(NumBytes, MFI.getMaxAlign());
 
   // Update stack size with corrected value.
   MFI.setStackSize(NumBytes);
@@ -232,16 +244,25 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
   emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
 
   // Emit stack adjust instructions
-  emitSPAdjustment(MF, MBB, MBBI, -NumBytes);
+  MaybeAlign RuntimeAlign =
+      NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
+  emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
+
+  if (hasBP(MF)) {
+    // Copy SP to BP.
+    BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
+        .addReg(VE::SX11)
+        .addImm(0);
+  }
 
   // Emit stack extend instructions
-  emitSPExtend(MF, MBB, MBBI, -NumBytes);
+  emitSPExtend(MF, MBB, MBBI);
 
-  unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true);
+  Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
 
   // Emit ".cfi_def_cfa_register 30".
   unsigned CFIIndex =
-      MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
+      MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
@@ -256,7 +277,7 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
     MachineBasicBlock::iterator I) const {
   if (!hasReservedCallFrame(MF)) {
     MachineInstr &MI = *I;
-    int Size = MI.getOperand(0).getImm();
+    int64_t Size = MI.getOperand(0).getImm();
     if (MI.getOpcode() == VE::ADJCALLSTACKDOWN)
       Size = -Size;
 
@@ -272,20 +293,17 @@ void VEFrameLowering::emitEpilogue(MachineFunction &MF,
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  int NumBytes = (int)MFI.getStackSize();
+  uint64_t NumBytes = MFI.getStackSize();
 
   // Emit Epilogue instructions to restore %lr
   emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
 }
 
-bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  // Reserve call frame if there are no variable sized objects on the stack.
-  return !MF.getFrameInfo().hasVarSizedObjects();
-}
-
 // hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
+// pointer register.  This is true if the function has variable sized allocas
+// or if frame pointer elimination is disabled.  For the case of VE, we don't
+// implement FP eliminator yet, but we returns false from this function to
+// not refer fp from generated code.
 bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
 
@@ -295,13 +313,41 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
          MFI.isFrameAddressTaken();
 }
 
+bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+  return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
+}
+
 int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
-                                            unsigned &FrameReg) const {
-  // Addressable stack objects are accessed using neg. offsets from
-  // %fp, or positive offsets from %sp.
+                                            Register &FrameReg) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const VERegisterInfo *RegInfo = STI.getRegisterInfo();
+  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+  bool isFixed = MFI.isFixedObjectIndex(FI);
+
   int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
-  FrameReg = VE::SX11; // %sp
-  return FrameOffset + MF.getFrameInfo().getStackSize();
+
+  if (FuncInfo->isLeafProc()) {
+    // If there's a leaf proc, all offsets need to be %sp-based,
+    // because we haven't caused %fp to actually point to our frame.
+    FrameReg = VE::SX11; // %sp
+    return FrameOffset + MF.getFrameInfo().getStackSize();
+  }
+  if (RegInfo->needsStackRealignment(MF) && !isFixed) {
+    // If there is dynamic stack realignment, all local object
+    // references need to be via %sp or %s17 (bp), to take account
+    // of the re-alignment.
+    if (hasBP(MF))
+      FrameReg = VE::SX17; // %bp
+    else
+      FrameReg = VE::SX11; // %sp
+    return FrameOffset + MF.getFrameInfo().getStackSize();
+  }
+  // Finally, default to using %fp.
+  FrameReg = RegInfo->getFrameRegister(MF);
+  return FrameOffset;
 }
 
 bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
@@ -321,5 +367,8 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                            RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
-  assert(isLeafProc(MF) && "TODO implement for non-leaf procs");
+  if (isLeafProc(MF)) {
+    VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>();
+    MFI->setLeafProc(true);
+  }
 }
diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h
index 97e31d21aa43b..b548d663c5043 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/llvm/lib/Target/VE/VEFrameLowering.h
@@ -28,23 +28,28 @@ public:
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator MBBI, int NumBytes,
+                         MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
                          bool RequireFPUpdate) const;
   void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator MBBI, int NumBytes,
+                         MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
                          bool RequireFPUpdate) const;
 
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator I) const override;
 
-  bool hasReservedCallFrame(const MachineFunction &MF) const override;
+  bool hasBP(const MachineFunction &MF) const;
   bool hasFP(const MachineFunction &MF) const override;
+  // VE reserves argument space always for call sites in the function
+  // immediately on entry of the current function.
+  bool hasReservedCallFrame(const MachineFunction &MF) const override {
+    return true;
+  }
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS = nullptr) const override;
 
   int getFrameIndexReference(const MachineFunction &MF, int FI,
-                             unsigned &FrameReg) const override;
+                             Register &FrameReg) const override;
 
   const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
@@ -58,10 +63,8 @@ public:
     return Offsets;
   }
 
-  /// targetHandlesStackFrameRounding - Returns true if the target is
-  /// responsible for rounding up the stack frame (probably at emitPrologue
-  /// time).
-  bool targetHandlesStackFrameRounding() const override { return true; }
+protected:
+  const VESubtarget &STI;
 
 private:
   // Returns true if MF is a leaf procedure.
@@ -69,11 +72,12 @@ private:
 
   // Emits code for adjusting SP in function prologue/epilogue.
   void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator MBBI, int NumBytes) const;
+                        MachineBasicBlock::iterator MBBI, int64_t NumBytes,
+                        MaybeAlign MayAlign = MaybeAlign()) const;
 
   // Emits code for extending SP in function prologue/epilogue.
   void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
-                    MachineBasicBlock::iterator MBBI, int NumBytes) const;
+                    MachineBasicBlock::iterator MBBI) const;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index 43030993efb90..f3d067d55fdb6 100644
--- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -23,6 +23,105 @@ using namespace llvm;
 // Instruction Selector Implementation
 //===----------------------------------------------------------------------===//
 
+/// Convert a DAG integer condition code to a VE ICC condition.
+inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unknown integer condition code!");
+  case ISD::SETEQ:
+    return VECC::CC_IEQ;
+  case ISD::SETNE:
+    return VECC::CC_INE;
+  case ISD::SETLT:
+    return VECC::CC_IL;
+  case ISD::SETGT:
+    return VECC::CC_IG;
+  case ISD::SETLE:
+    return VECC::CC_ILE;
+  case ISD::SETGE:
+    return VECC::CC_IGE;
+  case ISD::SETULT:
+    return VECC::CC_IL;
+  case ISD::SETULE:
+    return VECC::CC_ILE;
+  case ISD::SETUGT:
+    return VECC::CC_IG;
+  case ISD::SETUGE:
+    return VECC::CC_IGE;
+  }
+}
+
+/// Convert a DAG floating point condition code to a VE FCC condition.
+inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unknown fp condition code!");
+  case ISD::SETFALSE:
+    return VECC::CC_AF;
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    return VECC::CC_EQ;
+  case ISD::SETNE:
+  case ISD::SETONE:
+    return VECC::CC_NE;
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    return VECC::CC_L;
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    return VECC::CC_G;
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    return VECC::CC_LE;
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    return VECC::CC_GE;
+  case ISD::SETO:
+    return VECC::CC_NUM;
+  case ISD::SETUO:
+    return VECC::CC_NAN;
+  case ISD::SETUEQ:
+    return VECC::CC_EQNAN;
+  case ISD::SETUNE:
+    return VECC::CC_NENAN;
+  case ISD::SETULT:
+    return VECC::CC_LNAN;
+  case ISD::SETUGT:
+    return VECC::CC_GNAN;
+  case ISD::SETULE:
+    return VECC::CC_LENAN;
+  case ISD::SETUGE:
+    return VECC::CC_GENAN;
+  case ISD::SETTRUE:
+    return VECC::CC_AT;
+  }
+}
+
+/// getImmVal - get immediate representation of integer value
+inline static uint64_t getImmVal(const ConstantSDNode *N) {
+  return N->getSExtValue();
+}
+
+/// getFpImmVal - get immediate representation of floating point value
+inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
+  const APInt &Imm = N->getValueAPF().bitcastToAPInt();
+  uint64_t Val = Imm.getZExtValue();
+  if (Imm.getBitWidth() == 32) {
+    // Immediate value of float place places at higher bits on VE.
+    Val <<= 32;
+  }
+  return Val;
+}
+
+/// convMImmVal - Convert a mimm integer immediate value to target immediate.
+inline static uint64_t convMImmVal(uint64_t Val) {
+  if (Val == 0)
+    return 0; // (0)1
+  if (Val & (1UL << 63))
+    return countLeadingOnes(Val);       // (m)1
+  return countLeadingZeros(Val) | 0x40; // (m)0
+}
+
 //===--------------------------------------------------------------------===//
 /// VEDAGToDAGISel - VE specific code to select VE machine
 /// instructions for SelectionDAG operations.
@@ -43,15 +142,172 @@ public:
 
   void Select(SDNode *N) override;
 
+  // Complex Pattern Selectors.
+  bool selectADDRrri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+  bool selectADDRrii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+  bool selectADDRzri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+  bool selectADDRzii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+  bool selectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+
   StringRef getPassName() const override {
     return "VE DAG->DAG Pattern Instruction Selection";
   }
 
   // Include the pieces autogenerated from the target description.
 #include "VEGenDAGISel.inc"
+
+private:
+  SDNode *getGlobalBaseReg();
+
+  bool matchADDRrr(SDValue N, SDValue &Base, SDValue &Index);
+  bool matchADDRri(SDValue N, SDValue &Base, SDValue &Offset);
 };
 } // end anonymous namespace
 
+bool VEDAGToDAGISel::selectADDRrri(SDValue Addr, SDValue &Base, SDValue &Index,
+                                   SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::FrameIndex)
+    return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress ||
+      Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+    return false; // direct calls.
+
+  SDValue LHS, RHS;
+  if (matchADDRri(Addr, LHS, RHS)) {
+    if (matchADDRrr(LHS, Base, Index)) {
+      Offset = RHS;
+      return true;
+    }
+    // Return false to try selectADDRrii.
+    return false;
+  }
+  if (matchADDRrr(Addr, LHS, RHS)) {
+    if (matchADDRri(RHS, Index, Offset)) {
+      Base = LHS;
+      return true;
+    }
+    if (matchADDRri(LHS, Base, Offset)) {
+      Index = RHS;
+      return true;
+    }
+    Base = LHS;
+    Index = RHS;
+    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    return true;
+  }
+  return false; // Let the reg+imm(=0) pattern catch this!
+}
+
+bool VEDAGToDAGISel::selectADDRrii(SDValue Addr, SDValue &Base, SDValue &Index,
+                                   SDValue &Offset) {
+  if (matchADDRri(Addr, Base, Offset)) {
+    Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    return true;
+  }
+
+  Base = Addr;
+  Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  return true;
+}
+
+bool VEDAGToDAGISel::selectADDRzri(SDValue Addr, SDValue &Base, SDValue &Index,
+                                   SDValue &Offset) {
+  // Prefer ADDRrii.
+  return false;
+}
+
+bool VEDAGToDAGISel::selectADDRzii(SDValue Addr, SDValue &Base, SDValue &Index,
+                                   SDValue &Offset) {
+  if (dyn_cast<FrameIndexSDNode>(Addr)) {
+    return false;
+  }
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress ||
+      Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+    return false; // direct calls.
+
+  if (ConstantSDNode *CN = cast<ConstantSDNode>(Addr)) {
+    if (isInt<32>(CN->getSExtValue())) {
+      Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+      Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+      Offset =
+          CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool VEDAGToDAGISel::selectADDRri(SDValue Addr, SDValue &Base,
+                                  SDValue &Offset) {
+  if (matchADDRri(Addr, Base, Offset))
+    return true;
+
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  return true;
+}
+
+bool VEDAGToDAGISel::matchADDRrr(SDValue Addr, SDValue &Base, SDValue &Index) {
+  if (dyn_cast<FrameIndexSDNode>(Addr))
+    return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress ||
+      Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+    return false; // direct calls.
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    ; // Nothing to do here.
+  } else if (Addr.getOpcode() == ISD::OR) {
+    // We want to look through a transform in InstCombine and DAGCombiner that
+    // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
+    if (!CurDAG->haveNoCommonBitsSet(Addr.getOperand(0), Addr.getOperand(1)))
+      return false;
+  } else {
+    return false;
+  }
+
+  if (Addr.getOperand(0).getOpcode() == VEISD::Lo ||
+      Addr.getOperand(1).getOpcode() == VEISD::Lo)
+    return false; // Let the LEASL patterns catch this!
+
+  Base = Addr.getOperand(0);
+  Index = Addr.getOperand(1);
+  return true;
+}
+
+bool VEDAGToDAGISel::matchADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) {
+  auto AddrTy = Addr->getValueType(0);
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), AddrTy);
+    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress ||
+      Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+    return false; // direct calls.
+
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    ConstantSDNode *CN = cast<ConstantSDNode>(Addr.getOperand(1));
+    if (isInt<32>(CN->getSExtValue())) {
+      if (FrameIndexSDNode *FIN =
+              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+        // Constant offset from frame ref.
+        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), AddrTy);
+      } else {
+        Base = Addr.getOperand(0);
+      }
+      Offset =
+          CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
 void VEDAGToDAGISel::Select(SDNode *N) {
   SDLoc dl(N);
   if (N->isMachineOpcode()) {
@@ -59,9 +315,22 @@ void VEDAGToDAGISel::Select(SDNode *N) {
     return; // Already selected.
   }
 
+  switch (N->getOpcode()) {
+  case VEISD::GLOBAL_BASE_REG:
+    ReplaceNode(N, getGlobalBaseReg());
+    return;
+  }
+
   SelectCode(N);
 }
 
+SDNode *VEDAGToDAGISel::getGlobalBaseReg() {
+  Register GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG
+      ->getRegister(GlobalBaseReg, TLI->getPointerTy(CurDAG->getDataLayout()))
+      .getNode();
+}
+
 /// createVEISelDag - This pass converts a legalized DAG into a
 /// VE-specific DAG, ready for instruction scheduling.
 ///
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index aa6c3c08bd756..ab720545dd831 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "VEISelLowering.h"
+#include "MCTargetDesc/VEMCExpr.h"
+#include "VEMachineFunctionInfo.h"
 #include "VERegisterInfo.h"
 #include "VETargetMachine.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -36,14 +38,37 @@ using namespace llvm;
 // Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
+static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
+                          CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  switch (LocVT.SimpleTy) {
+  case MVT::f32: {
+    // Allocate stack like below
+    //    0      4
+    //    +------+------+
+    //    | empty| float|
+    //    +------+------+
+    // Use align=8 for dummy area to align the beginning of these 2 area.
+    State.AllocateStack(4, Align(8)); // for empty area
+    // Use align=4 for value to place it at just after the dummy area.
+    unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return true;
+  }
+  default:
+    return false;
+  }
+}
+
 #include "VEGenCallingConv.inc"
 
 bool VETargetLowering::CanLowerReturn(
     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
-  assert(!IsVarArg && "TODO implement var args");
-  assert(Outs.empty() && "TODO implement return values");
-  return true; // TODO support more than 'ret void'
+  CCAssignFn *RetCC = RetCC_VE;
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC);
 }
 
 SDValue
@@ -52,12 +77,57 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<SDValue> &OutVals,
                               const SDLoc &DL, SelectionDAG &DAG) const {
-  assert(!IsVarArg && "TODO implement var args");
-  assert(Outs.empty() && "TODO implement return values");
-  assert(OutVals.empty() && "TODO implement return values");
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
 
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_VE);
+
+  SDValue Flag;
   SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue OutVal = OutVals[i];
+
+    // Integer return values must be sign or zero extended by the callee.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::ZExt:
+      OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::AExt:
+      OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    default:
+      llvm_unreachable("Unknown loc info!");
+    }
+
+    assert(!VA.needsCustom() && "Unexpected custom lowering");
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
   RetOps[0] = Chain; // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
   return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
 }
 
@@ -65,8 +135,89 @@ SDValue VETargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-  assert(!IsVarArg && "TODO implement var args");
-  assert(Ins.empty() && "TODO implement input arguments");
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Get the base offset of the incoming arguments stack space.
+  unsigned ArgsBaseOffset = 176;
+  // Get the size of the preserved arguments area
+  unsigned ArgsPreserved = 64;
+
+  // Analyze arguments according to CC_VE.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+  // Allocate the preserved area first.
+  CCInfo.AllocateStack(ArgsPreserved, Align(8));
+  // We already allocated the preserved area, so the stack offset computed
+  // by CC_VE would be correct now.
+  CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    if (VA.isRegLoc()) {
+      // This argument is passed in a register.
+      // All integer register arguments are promoted by the caller to i64.
+
+      // Create a virtual register for the promoted live-in value.
+      unsigned VReg =
+          MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
+      SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+
+      // Get the high bits for i32 struct elements.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+        Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
+                          DAG.getConstant(32, DL, MVT::i32));
+
+      // The caller promoted the argument, so insert an Assert?ext SDNode so we
+      // won't promote the value again in this function.
+      switch (VA.getLocInfo()) {
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      default:
+        break;
+      }
+
+      // Truncate the register down to the argument type.
+      if (VA.isExtInLoc())
+        Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
+
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    // The registers are exhausted. This argument was passed on the stack.
+    assert(VA.isMemLoc());
+    // The CC_VE_Full/Half functions compute stack offsets relative to the
+    // beginning of the arguments area at %fp+176.
+    unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
+    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+    int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
+    InVals.push_back(
+        DAG.getLoad(VA.getValVT(), DL, Chain,
+                    DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
+                    MachinePointerInfo::getFixedStack(MF, FI)));
+  }
+
+  if (!IsVarArg)
+    return Chain;
+
+  // This function takes variable arguments, some of which may have been passed
+  // in registers %s0-%s8.
+  //
+  // The va_start intrinsic needs to know the offset to the first variable
+  // argument.
+  // TODO: need to calculate offset correctly once we support f128.
+  unsigned ArgOffset = ArgLocs.size() * 8;
+  VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+  // Skip the 176 bytes of register save area.
+  FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
+
   return Chain;
 }
 
@@ -78,7 +229,7 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
                      .Case("sp", VE::SX11)    // Stack pointer
                      .Case("fp", VE::SX9)     // Frame pointer
                      .Case("sl", VE::SX8)     // Stack limit
-                     .Case("lr", VE::SX10)    // Link regsiter
+                     .Case("lr", VE::SX10)    // Link register
                      .Case("tp", VE::SX14)    // Thread pointer
                      .Case("outer", VE::SX12) // Outer regiser
                      .Case("info", VE::SX17)  // Info area register
@@ -96,6 +247,314 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
 
+SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc DL = CLI.DL;
+  SDValue Chain = CLI.Chain;
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+  // VE target does not yet support tail call optimization.
+  CLI.IsTailCall = false;
+
+  // Get the base offset of the outgoing arguments stack space.
+  unsigned ArgsBaseOffset = 176;
+  // Get the size of the preserved arguments area
+  unsigned ArgsPreserved = 8 * 8u;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+  // Allocate the preserved area first.
+  CCInfo.AllocateStack(ArgsPreserved, Align(8));
+  // We already allocated the preserved area, so the stack offset computed
+  // by CC_VE would be correct now.
+  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
+
+  // VE requires to use both register and stack for varargs or no-prototyped
+  // functions.
+  bool UseBoth = CLI.IsVarArg;
+
+  // Analyze operands again if it is required to store BOTH.
+  SmallVector<CCValAssign, 16> ArgLocs2;
+  CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                  ArgLocs2, *DAG.getContext());
+  if (UseBoth)
+    CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+  // Keep stack frames 16-byte aligned.
+  ArgsSize = alignTo(ArgsSize, 16);
+
+  // Adjust the stack pointer to make room for the arguments.
+  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+  // with more than 6 arguments.
+  Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
+
+  // Collect the set of registers to pass to the function and their values.
+  // This will be emitted as a sequence of CopyToReg nodes glued to the call
+  // instruction.
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  // Collect chains from all the memory opeations that copy arguments to the
+  // stack. They must follow the stack pointer adjustment above and precede the
+  // call instruction itself.
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // VE needs to get address of callee function in a register
+  // So, prepare to copy it to SX12 here.
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  SDValue Callee = CLI.Callee;
+
+  bool IsPICCall = isPositionIndependent();
+
+  // PC-relative references to external symbols should go through $stub.
+  // If so, we need to prepare GlobalBaseReg first.
+  const TargetMachine &TM = DAG.getTarget();
+  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+  const GlobalValue *GV = nullptr;
+  auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
+  if (CalleeG)
+    GV = CalleeG->getGlobal();
+  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
+  bool UsePlt = !Local;
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Turn GlobalAddress/ExternalSymbol node into a value node
+  // containing the address of them here.
+  if (CalleeG) {
+    if (IsPICCall) {
+      if (UsePlt)
+        Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
+      Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
+      Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
+    } else {
+      Callee =
+          makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+    }
+  } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    if (IsPICCall) {
+      if (UsePlt)
+        Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
+      Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
+      Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
+    } else {
+      Callee =
+          makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+    }
+  }
+
+  RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = CLI.OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown location info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      if (!UseBoth)
+        continue;
+      VA = ArgLocs2[i];
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
+    // The argument area starts at %fp+176 in the callee frame,
+    // %sp+176 in ours.
+    SDValue PtrOff =
+        DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
+    PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
+    MemOpChains.push_back(
+        DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
+  }
+
+  // Emit all stores, make sure they occur before the call.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
+
+  // Build a sequence of CopyToReg nodes glued together with token chain and
+  // glue operands which copy the outgoing args into registers. The InGlue is
+  // necessary since all emitted instructions must be stuck together in order
+  // to pass the live physical registers.
+  SDValue InGlue;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
+                             RegsToPass[i].second, InGlue);
+    InGlue = Chain.getValue(1);
+  }
+
+  // Build the operands for the call instruction itself.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Add a register mask operand representing the call-preserved registers.
+  const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // Make sure the CopyToReg nodes are glued to the call instruction which
+  // consumes the registers.
+  if (InGlue.getNode())
+    Ops.push_back(InGlue);
+
+  // Now the call itself.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
+  InGlue = Chain.getValue(1);
+
+  // Revert the stack pointer immediately after the call.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
+                             DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
+  InGlue = Chain.getValue(1);
+
+  // Now extract the return values. This is more or less the same as
+  // LowerFormalArguments.
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+
+  // Set inreg flag manually for codegen generated library calls that
+  // return float.
+  if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
+    CLI.Ins[0].Flags.setInReg();
+
+  RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    unsigned Reg = VA.getLocReg();
+
+    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+    // reside in the same register in the high and low bits. Reuse the
+    // CopyFromReg previous node to avoid duplicate copies.
+    SDValue RV;
+    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+        RV = Chain.getValue(0);
+
+    // But usually we'll create a new CopyFromReg for a different register.
+    if (!RV.getNode()) {
+      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+      Chain = RV.getValue(1);
+      InGlue = Chain.getValue(2);
+    }
+
+    // Get the high bits for i32 struct elements.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+                       DAG.getConstant(32, DL, MVT::i32));
+
+    // The callee promoted the return value, so insert an Assert?ext SDNode so
+    // we won't promote the value again in this function.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate the register down to the return value type.
+    if (VA.isExtInLoc())
+      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+    InVals.push_back(RV);
+  }
+
+  return Chain;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                    bool ForCodeSize) const {
+  return VT == MVT::f32 || VT == MVT::f64;
+}
+
+/// Determine if the target supports unaligned memory accesses.
+///
+/// This function returns true if the target allows unaligned memory accesses
+/// of the specified type in the given address space. If true, it also returns
+/// whether the unaligned memory access is "fast" in the last argument by
+/// reference. This is used, for example, in situations where an array
+/// copy/move/set is converted to a sequence of store operations. Its use
+/// helps to ensure that such replacements don't generate code that causes an
+/// alignment error (trap) on the target machine.
+bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+                                                      unsigned AddrSpace,
+                                                      unsigned Align,
+                                                      MachineMemOperand::Flags,
+                                                      bool *Fast) const {
+  if (Fast) {
+    // It's fast anytime on VE
+    *Fast = true;
+  }
+  return true;
+}
+
+bool VETargetLowering::hasAndNot(SDValue Y) const {
+  EVT VT = Y.getValueType();
+
+  // VE doesn't have vector and not instruction.
+  if (VT.isVector())
+    return false;
+
+  // VE allows different immediate values for X and Y where ~X & Y.
+  // Only simm7 works for X, and only mimm works for Y on VE.  However, this
+  // function is used to check whether an immediate value is OK for and-not
+  // instruction as both X and Y.  Generating additional instruction to
+  // retrieve an immediate value is no good since the purpose of this
+  // function is to convert a series of 3 instructions to another series of
+  // 3 instructions with better parallelism.  Therefore, we return false
+  // for all immediate values now.
+  // FIXME: Change hasAndNot function to have two operands to make it work
+  //        correctly with Aurora VE.
+  if (isa<ConstantSDNode>(Y))
+    return false;
+
+  // It's ok for generic registers.
+  return true;
+}
+
 VETargetLowering::VETargetLowering(const TargetMachine &TM,
                                    const VESubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -108,7 +567,87 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
   setBooleanVectorContents(ZeroOrOneBooleanContent);
 
   // Set up the register classes.
+  addRegisterClass(MVT::i32, &VE::I32RegClass);
   addRegisterClass(MVT::i64, &VE::I64RegClass);
+  addRegisterClass(MVT::f32, &VE::F32RegClass);
+  addRegisterClass(MVT::f64, &VE::I64RegClass);
+
+  /// Load & Store {
+  for (MVT FPVT : MVT::fp_valuetypes()) {
+    for (MVT OtherFPVT : MVT::fp_valuetypes()) {
+      // Turn FP extload into load/fpextend
+      setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
+
+      // Turn FP truncstore into trunc + store.
+      setTruncStoreAction(FPVT, OtherFPVT, Expand);
+    }
+  }
+
+  // VE doesn't have i1 sign extending load
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+    setTruncStoreAction(VT, MVT::i1, Expand);
+  }
+  /// } Load & Store
+
+  // Custom legalize address nodes into LO/HI parts.
+  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
+  setOperationAction(ISD::BlockAddress, PtrVT, Custom);
+  setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+
+  /// VAARG handling {
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  // VAARG needs to be lowered to access with 8 bytes alignment.
+  setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  /// } VAARG handling
+
+  /// Stack {
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+  /// } Stack
+
+  /// Int Ops {
+  for (MVT IntVT : {MVT::i32, MVT::i64}) {
+    // VE has no REM or DIVREM operations.
+    setOperationAction(ISD::UREM, IntVT, Expand);
+    setOperationAction(ISD::SREM, IntVT, Expand);
+    setOperationAction(ISD::SDIVREM, IntVT, Expand);
+    setOperationAction(ISD::UDIVREM, IntVT, Expand);
+
+    setOperationAction(ISD::CTTZ, IntVT, Expand);
+    setOperationAction(ISD::ROTL, IntVT, Expand);
+    setOperationAction(ISD::ROTR, IntVT, Expand);
+
+    // Use isel patterns for i32 and i64
+    setOperationAction(ISD::BSWAP, IntVT, Legal);
+    setOperationAction(ISD::CTLZ, IntVT, Legal);
+    setOperationAction(ISD::CTPOP, IntVT, Legal);
+
+    // Use isel patterns for i64, Promote i32
+    LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
+    setOperationAction(ISD::BITREVERSE, IntVT, Act);
+  }
+  /// } Int Ops
+
+  /// Conversion {
+  // VE doesn't have instructions for fp<->uint, so expand them by llvm
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+  // fp16 not supported
+  for (MVT FPVT : MVT::fp_valuetypes()) {
+    setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
+    setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
+  }
+  /// } Conversion
 
   setStackPointerRegisterToSaveRestore(VE::SX11);
 
@@ -122,16 +661,316 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
 }
 
 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define TARGET_NODE_CASE(NAME)                                                 \
+  case VEISD::NAME:                                                            \
+    return "VEISD::" #NAME;
   switch ((VEISD::NodeType)Opcode) {
   case VEISD::FIRST_NUMBER:
     break;
-  case VEISD::RET_FLAG:
-    return "VEISD::RET_FLAG";
+    TARGET_NODE_CASE(Lo)
+    TARGET_NODE_CASE(Hi)
+    TARGET_NODE_CASE(GETFUNPLT)
+    TARGET_NODE_CASE(GETSTACKTOP)
+    TARGET_NODE_CASE(GETTLSADDR)
+    TARGET_NODE_CASE(CALL)
+    TARGET_NODE_CASE(RET_FLAG)
+    TARGET_NODE_CASE(GLOBAL_BASE_REG)
   }
+#undef TARGET_NODE_CASE
   return nullptr;
 }
 
 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
                                          EVT VT) const {
-  return MVT::i64;
+  return MVT::i32;
+}
+
+// Convert to a target node and set target flags.
+SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
+                                          SelectionDAG &DAG) const {
+  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
+    return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
+                                      GA->getValueType(0), GA->getOffset(), TF);
+
+  if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
+    return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
+                                     0, TF);
+
+  if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
+    return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
+                                       TF);
+
+  llvm_unreachable("Unhandled address SDNode");
+}
+
+// Split Op into high and low parts according to HiTF and LoTF.
+// Return an ADD node combining the parts.
+SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+                                       SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
+  SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
+  return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+}
+
+// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
+// or ExternalSymbol SDNode.
+SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT PtrVT = Op.getValueType();
+
+  // Handle PIC mode first. VE needs a got load for every variable!
+  if (isPositionIndependent()) {
+    // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
+    // function has calls.
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    MFI.setHasCalls(true);
+    auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
+
+    if (isa<ConstantPoolSDNode>(Op) ||
+        (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
+      // Create following instructions for local linkage PIC code.
+      //     lea %s35, %gotoff_lo(.LCPI0_0)
+      //     and %s35, %s35, (32)0
+      //     lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
+      //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
+      // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+      SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
+                                  VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+      SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
+      return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
+    }
+    // Create following instructions for not local linkage PIC code.
+    //     lea %s35, %got_lo(.LCPI0_0)
+    //     and %s35, %s35, (32)0
+    //     lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
+    //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
+    //     ld     %s35, (,%s35)
+    // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+    SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
+                                VEMCExpr::VK_VE_GOT_LO32, DAG);
+    SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
+    SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
+    return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
+                       MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+  }
+
+  // This is one of the absolute code models.
+  switch (getTargetMachine().getCodeModel()) {
+  default:
+    llvm_unreachable("Unsupported absolute code model");
+  case CodeModel::Small:
+  case CodeModel::Medium:
+  case CodeModel::Large:
+    // abs64.
+    return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+  }
+}
+
+/// Custom Lower {
+
+SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  return makeAddress(Op, DAG);
+}
+
+SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  return makeAddress(Op, DAG);
+}
+
+SDValue
+VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+
+  // Generate the following code:
+  //   t1: ch,glue = callseq_start t0, 0, 0
+  //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
+  //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
+  //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
+  SDValue Label = withTargetFlags(Op, 0, DAG);
+  EVT PtrVT = Op.getValueType();
+
+  // Lowering the machine isd will make sure everything is in the right
+  // location.
+  SDValue Chain = DAG.getEntryNode();
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
+      DAG.getMachineFunction(), CallingConv::C);
+  Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
+  SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
+  Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
+                             DAG.getIntPtrConstant(0, dl, true),
+                             Chain.getValue(1), dl);
+  Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
+
+  // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setHasCalls(true);
+
+  // Also generate code to prepare a GOT register if it is PIC.
+  if (isPositionIndependent()) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
+  }
+
+  return Chain;
+}
+
+SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // The current implementation of nld (2.26) doesn't allow local exec model
+  // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
+  // generate the general dynamic model code sequence.
+  //
+  // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
+  return LowerToTLSGeneralDynamicModel(Op, DAG);
+}
+
+SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+  // Need frame address to find the address of VarArgsFrameIndex.
+  MF.getFrameInfo().setFrameAddressIsTaken(true);
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  SDLoc DL(Op);
+  SDValue Offset =
+      DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
+                  DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
+                      MachinePointerInfo(SV));
+}
+
+SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  EVT PtrVT = VAListPtr.getValueType();
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  SDLoc DL(Node);
+  SDValue VAList =
+      DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
+  SDValue Chain = VAList.getValue(1);
+  SDValue NextPtr;
+
+  if (VT == MVT::f32) {
+    // float --> need special handling like below.
+    //    0      4
+    //    +------+------+
+    //    | empty| float|
+    //    +------+------+
+    // Increment the pointer, VAList, by 8 to the next vaarg.
+    NextPtr =
+        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
+    // Then, adjust VAList.
+    unsigned InternalOffset = 4;
+    VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                         DAG.getConstant(InternalOffset, DL, PtrVT));
+  } else {
+    // Increment the pointer, VAList, by 8 to the next vaarg.
+    NextPtr =
+        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
+  }
+
+  // Store the incremented VAList to the legalized pointer.
+  InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
+
+  // Load the actual argument out of the pointer VAList.
+  // We can't count on greater alignment than the word size.
+  return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
+                     std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
+}
+
+SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  // Generate following code.
+  //   (void)__llvm_grow_stack(size);
+  //   ret = GETSTACKTOP;        // pseudo instruction
+  SDLoc DL(Op);
+
+  // Get the inputs.
+  SDNode *Node = Op.getNode();
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  MaybeAlign Alignment(Op.getConstantOperandVal(2));
+  EVT VT = Node->getValueType(0);
+
+  // Chain the dynamic stack allocation so that it doesn't modify the stack
+  // pointer when other instructions are using the stack.
+  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
+
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+  Align StackAlign = TFI.getStackAlign();
+  bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
+
+  // Prepare arguments
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Size;
+  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+  Args.push_back(Entry);
+  if (NeedsAlign) {
+    Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Args.push_back(Entry);
+  }
+  Type *RetTy = Type::getVoidTy(*DAG.getContext());
+
+  EVT PtrVT = Op.getValueType();
+  SDValue Callee;
+  if (NeedsAlign) {
+    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
+  } else {
+    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
+  }
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(DL)
+      .setChain(Chain)
+      .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
+      .setDiscardResult(true);
+  std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
+  Chain = pair.second;
+  SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
+  if (NeedsAlign) {
+    Result = DAG.getNode(ISD::ADD, DL, VT, Result,
+                         DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
+    Result = DAG.getNode(ISD::AND, DL, VT, Result,
+                         DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
+  }
+  //  Chain = Result.getValue(1);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+                             DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
+
+  SDValue Ops[2] = {Result, Chain};
+  return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default:
+    llvm_unreachable("Should not custom lower this!");
+  case ISD::BlockAddress:
+    return LowerBlockAddress(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC:
+    return lowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  case ISD::VAARG:
+    return LowerVAARG(Op, DAG);
+  }
 }
+/// } Custom Lower
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 39b3610a0c3a1..4633220efaa18 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -23,7 +23,18 @@ class VESubtarget;
 namespace VEISD {
 enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
-  RET_FLAG, // Return with a flag operand.
+
+  Hi,
+  Lo, // Hi/Lo operations, typically on a global address.
+
+  GETFUNPLT,   // load function address through %plt insturction
+  GETTLSADDR,  // load address for TLS access
+  GETSTACKTOP, // retrieve address of stack top (first address of
+               // locals and temporaries)
+
+  CALL,            // A call instruction.
+  RET_FLAG,        // Return with a flag operand.
+  GLOBAL_BASE_REG, // Global base reg for PIC.
 };
 }
 
@@ -34,6 +45,9 @@ public:
   VETargetLowering(const TargetMachine &TM, const VESubtarget &STI);
 
   const char *getTargetNodeName(unsigned Opcode) const override;
+  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+    return MVT::i32;
+  }
 
   Register getRegisterByName(const char *RegName, LLT VT,
                              const MachineFunction &MF) const override;
@@ -48,6 +62,9 @@ public:
                                const SDLoc &dl, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) const override;
 
+  SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const override;
+
   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
                       bool isVarArg,
                       const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
@@ -56,6 +73,36 @@ public:
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
                       SelectionDAG &DAG) const override;
+
+  /// Custom Lower {
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+  /// } Custom Lower
+
+  SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
+  SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+                       SelectionDAG &DAG) const;
+  SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
+
+  bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                    bool ForCodeSize) const override;
+  /// Returns true if the target allows unaligned memory accesses of the
+  /// specified type.
+  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
+                                      MachineMemOperand::Flags Flags,
+                                      bool *Fast) const override;
+
+  // Block s/udiv lowering for now
+  bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
+
+  bool hasAndNot(SDValue Y) const override;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Target/VE/VEInstrFormats.td b/llvm/lib/Target/VE/VEInstrFormats.td
index a8d3e786ba891..0c02411ff916d 100644
--- a/llvm/lib/Target/VE/VEInstrFormats.td
+++ b/llvm/lib/Target/VE/VEInstrFormats.td
@@ -6,6 +6,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+// SX-Aurora uses little endian, but instructions are encoded little bit
+// different manner.  Therefore, we need to tranlate the address of each
+// bitfield described in ISA documentation like below.
+//
+// ISA   |  InstrFormats.td
+// ---------------------------
+// 0-7   => 63-56
+// 8     => 55
+// 32-63 => 31-0
+
+//===----------------------------------------------------------------------===//
+// Instruction Format
+//===----------------------------------------------------------------------===//
+
 class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
    : Instruction {
   field bits<64> Inst;
@@ -14,7 +28,7 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
   let Size = 8;
 
   bits<8> op;
-  let Inst{0-7} = op;
+  let Inst{63-56} = op;
 
   dag OutOperandList = outs;
   dag InOperandList = ins;
@@ -25,50 +39,154 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
   field bits<64> SoftFail = 0;
 }
 
-class RM<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern=[]>
+//-----------------------------------------------------------------------------
+// Section 5.1 RM Type
+//
+// RM type has sx, sy, sz, and imm32.
+// The effective address is generated by sz + sy + imm32.
+//-----------------------------------------------------------------------------
+
+class RM<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
    : InstVE<outs, ins, asmstr, pattern> {
   bits<1>  cx = 0;
   bits<7>  sx;
-  bits<1>  cy = 0;
+  bits<1>  cy = 1;
+  bits<7>  sz;      // defines sz prior to sy to assign from sz
+  bits<7>  sy;
+  bits<1>  cz = 1;
+  bits<32> imm32;
+  let op = opVal;
+  let Inst{55} = cx;
+  let Inst{54-48} = sx;
+  let Inst{47} = cy;
+  let Inst{46-40} = sy;
+  let Inst{39} = cz;
+  let Inst{38-32} = sz;
+  let Inst{31-0}  = imm32;
+}
+
+//-----------------------------------------------------------------------------
+// Section 5.2 RRM Type
+//
+// RRM type is identical to RM, but the effective address is generated
+// by sz + imm32.  The sy field is used by other purposes.
+//-----------------------------------------------------------------------------
+
+class RRM<bits<8>opVal, dag outs, dag ins, string asmstr,
+          list<dag> pattern = []>
+   : RM<opVal, outs, ins, asmstr, pattern>;
+
+// RRMHM type is to load/store host memory
+// It is similar to RRM and not use sy.
+class RRMHM<bits<8>opVal, dag outs, dag ins, string asmstr,
+            list<dag> pattern = []>
+   : RRM<opVal, outs, ins, asmstr, pattern> {
+  bits<2> ry = 0;
+  let cy = 0;
+  let sy{6-2} = 0;
+  let sy{1-0} = ry;
+}
+
+//-----------------------------------------------------------------------------
+// Section 5.3 CF Type
+//
+// CF type is used for control flow.
+//-----------------------------------------------------------------------------
+
+class CF<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+   : InstVE<outs, ins, asmstr, pattern> {
+  bits<1>  cx = 0;
+  bits<1>  cx2 = 0;
+  bits<2>  bpf = 0;
+  bits<4>  cf;
+  bits<1>  cy = 1;
   bits<7>  sy;
-  bits<1>  cz = 0;
+  bits<1>  cz = 1;
   bits<7>  sz;
-  bits<32> imm32 = 0;
+  bits<32> imm32;
   let op = opVal;
-  let Inst{15} = cx;
-  let Inst{14-8} = sx;
-  let Inst{23} = cy;
-  let Inst{22-16} = sy;
-  let Inst{31} = cz;
-  let Inst{30-24} = sz;
-  let Inst{63-32}  = imm32;
+  let Inst{55} = cx;
+  let Inst{54} = cx2;
+  let Inst{53-52} = bpf;
+  let Inst{51-48} = cf;
+  let Inst{47} = cy;
+  let Inst{46-40} = sy;
+  let Inst{39} = cz;
+  let Inst{38-32} = sz;
+  let Inst{31-0}  = imm32;
 }
 
-class RR<bits<8>opVal, dag outs, dag ins, string asmstr>
-   : RM<opVal, outs, ins, asmstr> {
+//-----------------------------------------------------------------------------
+// Section 5.4 RR Type
+//
+// RR type is for generic arithmetic instructions.
+//-----------------------------------------------------------------------------
+
+class RR<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+   : InstVE<outs, ins, asmstr, pattern> {
+  bits<1>  cx = 0;
+  bits<7>  sx;
+  bits<1>  cy = 1;
+  bits<7>  sy;
+  bits<1>  cz = 1;
+  bits<7>  sz;          // m field places at the top sz field
+  bits<8>  vx = 0;
+  bits<8>  vz = 0;
   bits<1> cw = 0;
   bits<1> cw2 = 0;
   bits<4> cfw = 0;
-  let imm32{0-23} = 0;
-  let imm32{24} = cw;
-  let imm32{25} = cw2;
-  let imm32{26-27} = 0;
-  let imm32{28-31} = cfw;
+  let op = opVal;
+  let Inst{55} = cx;
+  let Inst{54-48} = sx;
+  let Inst{47} = cy;
+  let Inst{46-40} = sy;
+  let Inst{39} = cz;
+  let Inst{38-32} = sz;
+  let Inst{31-24} = vx;
+  let Inst{23-16} = 0;
+  let Inst{15-8} = vz;
+  let Inst{7} = cw;
+  let Inst{6} = cw2;
+  let Inst{5-4} = 0;
+  let Inst{3-0} = cfw;
 }
 
-class CF<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern=[]>
-   : RM<opVal, outs, ins, asmstr, pattern> {
-  bits<1>  cx2;
-  bits<2>  bpf;
-  bits<4>  cf;
-  let cx = 0;
-  let sx{6} = cx2;
-  let sx{5-4} = bpf;
-  let sx{3-0} = cf;
+// RRFENCE type is special RR type for a FENCE instruction.
+class RRFENCE<bits<8>opVal, dag outs, dag ins, string asmstr,
+              list<dag> pattern = []>
+   : InstVE<outs, ins, asmstr, pattern> {
+  bits<1> avo = 0;
+  bits<1> lf = 0;
+  bits<1> sf = 0;
+  bits<1> c2 = 0;
+  bits<1> c1 = 0;
+  bits<1> c0 = 0;
+  let op = opVal;
+  let Inst{55} = avo;
+  let Inst{54-50} = 0;
+  let Inst{49} = lf;
+  let Inst{48} = sf;
+  let Inst{47-43} = 0;
+  let Inst{42} = c2;
+  let Inst{41} = c1;
+  let Inst{40} = c0;
+  let Inst{39-0} = 0;
 }
 
+//-----------------------------------------------------------------------------
+// Section 5.5 RW Type
+//-----------------------------------------------------------------------------
+
+//-----------------------------------------------------------------------------
+// Section 5.6 RVM Type
+//-----------------------------------------------------------------------------
+
+//-----------------------------------------------------------------------------
+// Section 5.7 RV Type
+//-----------------------------------------------------------------------------
+
 // Pseudo instructions.
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern=[]>
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = []>
    : InstVE<outs, ins, asmstr, pattern> {
   let isCodeGenOnly = 1;
   let isPseudo = 1;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index bc382dcef7c35..86b2ac2078b1b 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -12,6 +12,7 @@
 
 #include "VEInstrInfo.h"
 #include "VE.h"
+#include "VEMachineFunctionInfo.h"
 #include "VESubtarget.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -24,7 +25,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
-#define DEBUG_TYPE "ve"
+#define DEBUG_TYPE "ve-instr-info"
 
 using namespace llvm;
 
@@ -35,8 +36,441 @@ using namespace llvm;
 void VEInstrInfo::anchor() {}
 
 VEInstrInfo::VEInstrInfo(VESubtarget &ST)
-    : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI(),
-      Subtarget(ST) {}
+    : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {}
+
+static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); }
+
+static VECC::CondCode GetOppositeBranchCondition(VECC::CondCode CC) {
+  switch (CC) {
+  case VECC::CC_IG:
+    return VECC::CC_ILE;
+  case VECC::CC_IL:
+    return VECC::CC_IGE;
+  case VECC::CC_INE:
+    return VECC::CC_IEQ;
+  case VECC::CC_IEQ:
+    return VECC::CC_INE;
+  case VECC::CC_IGE:
+    return VECC::CC_IL;
+  case VECC::CC_ILE:
+    return VECC::CC_IG;
+  case VECC::CC_AF:
+    return VECC::CC_AT;
+  case VECC::CC_G:
+    return VECC::CC_LENAN;
+  case VECC::CC_L:
+    return VECC::CC_GENAN;
+  case VECC::CC_NE:
+    return VECC::CC_EQNAN;
+  case VECC::CC_EQ:
+    return VECC::CC_NENAN;
+  case VECC::CC_GE:
+    return VECC::CC_LNAN;
+  case VECC::CC_LE:
+    return VECC::CC_GNAN;
+  case VECC::CC_NUM:
+    return VECC::CC_NAN;
+  case VECC::CC_NAN:
+    return VECC::CC_NUM;
+  case VECC::CC_GNAN:
+    return VECC::CC_LE;
+  case VECC::CC_LNAN:
+    return VECC::CC_GE;
+  case VECC::CC_NENAN:
+    return VECC::CC_EQ;
+  case VECC::CC_EQNAN:
+    return VECC::CC_NE;
+  case VECC::CC_GENAN:
+    return VECC::CC_L;
+  case VECC::CC_LENAN:
+    return VECC::CC_G;
+  case VECC::CC_AT:
+    return VECC::CC_AF;
+  case VECC::UNKNOWN:
+    return VECC::UNKNOWN;
+  }
+  llvm_unreachable("Invalid cond code");
+}
+
+// Treat br.l [BRCF AT] as unconditional branch
+static bool isUncondBranchOpcode(int Opc) {
+  return Opc == VE::BRCFLa    || Opc == VE::BRCFWa    ||
+         Opc == VE::BRCFLa_nt || Opc == VE::BRCFWa_nt ||
+         Opc == VE::BRCFLa_t  || Opc == VE::BRCFWa_t  ||
+         Opc == VE::BRCFDa    || Opc == VE::BRCFSa    ||
+         Opc == VE::BRCFDa_nt || Opc == VE::BRCFSa_nt ||
+         Opc == VE::BRCFDa_t  || Opc == VE::BRCFSa_t;
+}
+
+static bool isCondBranchOpcode(int Opc) {
+  return Opc == VE::BRCFLrr    || Opc == VE::BRCFLir    ||
+         Opc == VE::BRCFLrr_nt || Opc == VE::BRCFLir_nt ||
+         Opc == VE::BRCFLrr_t  || Opc == VE::BRCFLir_t  ||
+         Opc == VE::BRCFWrr    || Opc == VE::BRCFWir    ||
+         Opc == VE::BRCFWrr_nt || Opc == VE::BRCFWir_nt ||
+         Opc == VE::BRCFWrr_t  || Opc == VE::BRCFWir_t  ||
+         Opc == VE::BRCFDrr    || Opc == VE::BRCFDir    ||
+         Opc == VE::BRCFDrr_nt || Opc == VE::BRCFDir_nt ||
+         Opc == VE::BRCFDrr_t  || Opc == VE::BRCFDir_t  ||
+         Opc == VE::BRCFSrr    || Opc == VE::BRCFSir    ||
+         Opc == VE::BRCFSrr_nt || Opc == VE::BRCFSir_nt ||
+         Opc == VE::BRCFSrr_t  || Opc == VE::BRCFSir_t;
+}
+
+static bool isIndirectBranchOpcode(int Opc) {
+  return Opc == VE::BCFLari    || Opc == VE::BCFLari    ||
+         Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
+         Opc == VE::BCFLari_t  || Opc == VE::BCFLari_t  ||
+         Opc == VE::BCFLari    || Opc == VE::BCFLari    ||
+         Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
+         Opc == VE::BCFLari_t  || Opc == VE::BCFLari_t;
+}
+
+static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
+                            SmallVectorImpl<MachineOperand> &Cond) {
+  Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(0).getImm()));
+  Cond.push_back(LastInst->getOperand(1));
+  Cond.push_back(LastInst->getOperand(2));
+  Target = LastInst->getOperand(3).getMBB();
+}
+
+bool VEInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return false;
+
+  if (!isUnpredicatedTerminator(*I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = &*I;
+  unsigned LastOpc = LastInst->getOpcode();
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+    if (isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      parseCondBranch(LastInst, TBB, Cond);
+      return false;
+    }
+    return true; // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = &*I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+    while (isUncondBranchOpcode(SecondLastOpc)) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      }
+      SecondLastInst = &*I;
+      SecondLastOpc = SecondLastInst->getOpcode();
+    }
+  }
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
+    return true;
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    parseCondBranch(SecondLastInst, TBB, Cond);
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed.
+  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // ...likewise if it ends with an indirect branch followed by an unconditional
+  // branch.
+  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *TBB,
+                                   MachineBasicBlock *FBB,
+                                   ArrayRef<MachineOperand> Cond,
+                                   const DebugLoc &DL, int *BytesAdded) const {
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 3 || Cond.size() == 0) &&
+         "VE branch conditions should have three component!");
+  assert(!BytesAdded && "code size not handled");
+  if (Cond.empty()) {
+    // Uncondition branch
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(VE::BRCFLa_t))
+        .addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch
+  //   (BRCFir CC sy sz addr)
+  assert(Cond[0].isImm() && Cond[2].isReg() && "not implemented");
+
+  unsigned opc[2];
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  MachineFunction *MF = MBB.getParent();
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
+  unsigned Reg = Cond[2].getReg();
+  if (IsIntegerCC(Cond[0].getImm())) {
+    if (TRI->getRegSizeInBits(Reg, MRI) == 32) {
+      opc[0] = VE::BRCFWir;
+      opc[1] = VE::BRCFWrr;
+    } else {
+      opc[0] = VE::BRCFLir;
+      opc[1] = VE::BRCFLrr;
+    }
+  } else {
+    if (TRI->getRegSizeInBits(Reg, MRI) == 32) {
+      opc[0] = VE::BRCFSir;
+      opc[1] = VE::BRCFSrr;
+    } else {
+      opc[0] = VE::BRCFDir;
+      opc[1] = VE::BRCFDrr;
+    }
+  }
+  if (Cond[1].isImm()) {
+      BuildMI(&MBB, DL, get(opc[0]))
+          .add(Cond[0]) // condition code
+          .add(Cond[1]) // lhs
+          .add(Cond[2]) // rhs
+          .addMBB(TBB);
+  } else {
+      BuildMI(&MBB, DL, get(opc[1]))
+          .add(Cond[0])
+          .add(Cond[1])
+          .add(Cond[2])
+          .addMBB(TBB);
+  }
+
+  if (!FBB)
+    return 1;
+
+  BuildMI(&MBB, DL, get(VE::BRCFLa_t))
+      .addMBB(FBB);
+  return 2;
+}
+
+unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                   int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    if (!isUncondBranchOpcode(I->getOpcode()) &&
+        !isCondBranchOpcode(I->getOpcode()))
+      break; // Not a branch
+
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+  return Count;
+}
+
+bool VEInstrInfo::reverseBranchCondition(
+    SmallVectorImpl<MachineOperand> &Cond) const {
+  VECC::CondCode CC = static_cast<VECC::CondCode>(Cond[0].getImm());
+  Cond[0].setImm(GetOppositeBranchCondition(CC));
+  return false;
+}
+
+static bool IsAliasOfSX(Register Reg) {
+  return VE::I8RegClass.contains(Reg) || VE::I16RegClass.contains(Reg) ||
+         VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
+         VE::F32RegClass.contains(Reg);
+}
+
+void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I, const DebugLoc &DL,
+                              MCRegister DestReg, MCRegister SrcReg,
+                              bool KillSrc) const {
+
+  if (IsAliasOfSX(SrcReg) && IsAliasOfSX(DestReg)) {
+    BuildMI(MBB, I, DL, get(VE::ORri), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc))
+        .addImm(0);
+  } else {
+    const TargetRegisterInfo *TRI = &getRegisterInfo();
+    dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI)
+           << " to " << printReg(DestReg, TRI) << "\n";
+    llvm_unreachable("Impossible reg-to-reg copy");
+  }
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+                                          int &FrameIndex) const {
+  if (MI.getOpcode() == VE::LDrii ||    // I64
+      MI.getOpcode() == VE::LDLSXrii || // I32
+      MI.getOpcode() == VE::LDUrii      // F32
+  ) {
+    if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+        MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() &&
+        MI.getOperand(3).getImm() == 0) {
+      FrameIndex = MI.getOperand(1).getIndex();
+      return MI.getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+                                         int &FrameIndex) const {
+  if (MI.getOpcode() == VE::STrii ||  // I64
+      MI.getOpcode() == VE::STLrii || // I32
+      MI.getOpcode() == VE::STUrii    // F32
+  ) {
+    if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+        MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() &&
+        MI.getOperand(2).getImm() == 0) {
+      FrameIndex = MI.getOperand(0).getIndex();
+      return MI.getOperand(3).getReg();
+    }
+  }
+  return 0;
+}
+
+void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator I,
+                                      Register SrcReg, bool isKill, int FI,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end())
+    DL = I->getDebugLoc();
+
+  MachineFunction *MF = MBB.getParent();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
+      MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+  // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+  if (RC == &VE::I64RegClass) {
+    BuildMI(MBB, I, DL, get(VE::STrii))
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addMemOperand(MMO);
+  } else if (RC == &VE::I32RegClass) {
+    BuildMI(MBB, I, DL, get(VE::STLrii))
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addMemOperand(MMO);
+  } else if (RC == &VE::F32RegClass) {
+    BuildMI(MBB, I, DL, get(VE::STUrii))
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addMemOperand(MMO);
+  } else
+    report_fatal_error("Can't store this register to stack slot");
+}
+
+void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       Register DestReg, int FI,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end())
+    DL = I->getDebugLoc();
+
+  MachineFunction *MF = MBB.getParent();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
+      MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+  if (RC == &VE::I64RegClass) {
+    BuildMI(MBB, I, DL, get(VE::LDrii), DestReg)
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addMemOperand(MMO);
+  } else if (RC == &VE::I32RegClass) {
+    BuildMI(MBB, I, DL, get(VE::LDLSXrii), DestReg)
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addMemOperand(MMO);
+  } else if (RC == &VE::F32RegClass) {
+    BuildMI(MBB, I, DL, get(VE::LDUrii), DestReg)
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addImm(0)
+        .addMemOperand(MMO);
+  } else
+    report_fatal_error("Can't load this register from stack slot");
+}
+
+Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  VEMachineFunctionInfo *VEFI = MF->getInfo<VEMachineFunctionInfo>();
+  Register GlobalBaseReg = VEFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // We use %s15 (%got) as a global base register
+  GlobalBaseReg = VE::SX15;
+
+  // Insert a pseudo instruction to set the GlobalBaseReg into the first
+  // MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  DebugLoc dl;
+  BuildMI(FirstMBB, MBBI, dl, get(VE::GETGOT), GlobalBaseReg);
+  VEFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
 
 bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   switch (MI.getOpcode()) {
@@ -47,6 +481,9 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.eraseFromParent(); // The pseudo instruction is gone now.
     return true;
   }
+  case VE::GETSTACKTOP: {
+    return expandGetStackTopPseudo(MI);
+  }
   }
   return false;
 }
@@ -54,8 +491,8 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const VEInstrInfo &TII =
-      *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+  const VEInstrInfo &TII = *STI.getInstrInfo();
   DebugLoc dl = MBB.findDebugLoc(MI);
 
   // Create following instructions and multiple basic blocks.
@@ -91,7 +528,7 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
   // Next, add the true and fallthrough blocks as its successors.
   BB->addSuccessor(syscallMBB);
   BB->addSuccessor(sinkMBB);
-  BuildMI(BB, dl, TII.get(VE::BCRLrr))
+  BuildMI(BB, dl, TII.get(VE::BRCFLrr_t))
       .addImm(VECC::CC_IGE)
       .addReg(VE::SX11) // %sp
       .addReg(VE::SX8)  // %sl
@@ -102,23 +539,26 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
   // Update machine-CFG edges
   BB->addSuccessor(sinkMBB);
 
-  BuildMI(BB, dl, TII.get(VE::LDSri), VE::SX61)
+  BuildMI(BB, dl, TII.get(VE::LDrii), VE::SX61)
       .addReg(VE::SX14)
+      .addImm(0)
       .addImm(0x18);
   BuildMI(BB, dl, TII.get(VE::ORri), VE::SX62)
       .addReg(VE::SX0)
       .addImm(0);
-  BuildMI(BB, dl, TII.get(VE::LEAzzi), VE::SX63)
+  BuildMI(BB, dl, TII.get(VE::LEAzii), VE::SX63)
+      .addImm(0)
+      .addImm(0)
       .addImm(0x13b);
-  BuildMI(BB, dl, TII.get(VE::SHMri))
+  BuildMI(BB, dl, TII.get(VE::SHMLri))
       .addReg(VE::SX61)
       .addImm(0)
       .addReg(VE::SX63);
-  BuildMI(BB, dl, TII.get(VE::SHMri))
+  BuildMI(BB, dl, TII.get(VE::SHMLri))
       .addReg(VE::SX61)
       .addImm(8)
       .addReg(VE::SX8);
-  BuildMI(BB, dl, TII.get(VE::SHMri))
+  BuildMI(BB, dl, TII.get(VE::SHMLri))
       .addReg(VE::SX61)
       .addImm(16)
       .addReg(VE::SX11);
@@ -131,3 +571,35 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
   MI.eraseFromParent(); // The pseudo instruction is gone now.
   return true;
 }
+
+bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineFunction &MF = *MBB->getParent();
+  const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+  const VEInstrInfo &TII = *STI.getInstrInfo();
+  DebugLoc DL = MBB->findDebugLoc(MI);
+
+  // Create following instruction
+  //
+  //   dst = %sp + target specific frame + the size of parameter area
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const VEFrameLowering &TFL = *STI.getFrameLowering();
+
+  // The VE ABI requires a reserved 176 bytes area at the top
+  // of stack as described in VESubtarget.cpp.  So, we adjust it here.
+  unsigned NumBytes = STI.getAdjustedFrameSize(0);
+
+  // Also adds the size of parameter area.
+  if (MFI.adjustsStack() && TFL.hasReservedCallFrame(MF))
+    NumBytes += MFI.getMaxCallFrameSize();
+
+  BuildMI(*MBB, MI, DL, TII.get(VE::LEArii))
+      .addDef(MI.getOperand(0).getReg())
+      .addReg(VE::SX11)
+      .addImm(0)
+      .addImm(NumBytes);
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return true;
+}
diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h
index 6a26d0e952750..7b6662df1d605 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/llvm/lib/Target/VE/VEInstrInfo.h
@@ -25,7 +25,6 @@ class VESubtarget;
 
 class VEInstrInfo : public VEGenInstrInfo {
   const VERegisterInfo RI;
-  const VESubtarget &Subtarget;
   virtual void anchor();
 
 public:
@@ -37,10 +36,52 @@ public:
   ///
   const VERegisterInfo &getRegisterInfo() const { return RI; }
 
+  /// Branch Analysis & Modification {
+  bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify = false) const override;
+
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
+
+  bool
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  /// } Branch Analysis & Modification
+
+  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                   const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
+                   bool KillSrc) const override;
+
+  /// Stack Spill & Reload {
+  unsigned isLoadFromStackSlot(const MachineInstr &MI,
+                               int &FrameIndex) const override;
+  unsigned isStoreToStackSlot(const MachineInstr &MI,
+                              int &FrameIndex) const override;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI, Register SrcReg,
+                           bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const override;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI, Register DestReg,
+                            int FrameIndex, const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const override;
+  /// } Stack Spill & Reload
+
+  Register getGlobalBaseReg(MachineFunction *MF) const;
+
   // Lower pseudo instructions after register allocation.
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
   bool expandExtendStackPseudo(MachineInstr &MI) const;
+  bool expandGetStackTopPseudo(MachineInstr &MI) const;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index dc671aaa3f8d2..8500f8ef1292d 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -17,6 +17,94 @@
 include "VEInstrFormats.td"
 
 //===----------------------------------------------------------------------===//
+// Helper functions to retrieve target constants.
+//
+// VE instructions have a space to hold following immediates
+//   $sy has 7 bits to represent simm7, uimm7, simm7fp, or uimm7fp.
+//   $sz also has 7 bits to represent mimm or mimmfp.
+//   $disp has 32 bits to represent simm32.
+//
+// The mimm is a special immediate value of sequential bit stream of 0 or 1.
+//     `(m)0`: Represents 0 sequence then 1 sequence like 0b00...0011...11,
+//             where `m` is equal to the number of leading zeros.
+//     `(m)1`: Represents 1 sequence then 0 sequence like 0b11...1100...00,
+//             where `m` is equal to the number of leading ones.
+// Each bit of mimm's 7 bits is used like below:
+//     bit 6  : If `(m)0`, this bit is 1.  Otherwise, this bit is 0.
+//     bit 5-0: Represents the m (0-63).
+// Use `!add(m, 64)` to generates an immediate value in pattern matchings.
+//
+// The floating point immediate value is not something like compacted value.
+// It is simple integer representation, so it works rarely.
+//     e.g. 0.0 (0x00000000) or -2.0 (0xC0000000=(2)1).
+//===----------------------------------------------------------------------===//
+
+def ULO7 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0x7f,
+                                   SDLoc(N), MVT::i32);
+}]>;
+def LO7 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(SignExtend32(N->getSExtValue(), 7),
+                                   SDLoc(N), MVT::i32);
+}]>;
+def MIMM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(convMImmVal(getImmVal(N)),
+                                   SDLoc(N), MVT::i32);
+}]>;
+def LO32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(Lo_32(N->getZExtValue()),
+                                   SDLoc(N), MVT::i32);
+}]>;
+def HI32 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return CurDAG->getTargetConstant(Hi_32(N->getZExtValue()),
+                                   SDLoc(N), MVT::i32);
+}]>;
+
+def LO7FP : SDNodeXForm<fpimm, [{
+  uint64_t Val = getFpImmVal(N);
+  return CurDAG->getTargetConstant(SignExtend32(Val, 7), SDLoc(N), MVT::i32);
+}]>;
+def MIMMFP : SDNodeXForm<fpimm, [{
+  return CurDAG->getTargetConstant(convMImmVal(getFpImmVal(N)),
+                                   SDLoc(N), MVT::i32);
+}]>;
+def LOFP32 : SDNodeXForm<fpimm, [{
+  return CurDAG->getTargetConstant(Lo_32(getFpImmVal(N) & 0xffffffff),
+                                   SDLoc(N), MVT::i32);
+}]>;
+def HIFP32 : SDNodeXForm<fpimm, [{
+  return CurDAG->getTargetConstant(Hi_32(getFpImmVal(N)), SDLoc(N), MVT::i32);
+}]>;
+
+def icond2cc : SDNodeXForm<cond, [{
+  VECC::CondCode VECC = intCondCode2Icc(N->get());
+  return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def icond2ccSwap : SDNodeXForm<cond, [{
+  ISD::CondCode CC = getSetCCSwappedOperands(N->get());
+  VECC::CondCode VECC = intCondCode2Icc(CC);
+  return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def fcond2cc : SDNodeXForm<cond, [{
+  VECC::CondCode VECC = fpCondCode2Fcc(N->get());
+  return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def fcond2ccSwap : SDNodeXForm<cond, [{
+  ISD::CondCode CC = getSetCCSwappedOperands(N->get());
+  VECC::CondCode VECC = fpCondCode2Fcc(CC);
+  return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def CCOP : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue(),
+                                   SDLoc(N), MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
 // Feature predicates.
 //===----------------------------------------------------------------------===//
 
@@ -24,42 +112,302 @@ include "VEInstrFormats.td"
 // Instruction Pattern Stuff
 //===----------------------------------------------------------------------===//
 
-def simm7       : PatLeaf<(imm), [{ return isInt<7>(N->getSExtValue()); }]>;
+// zero
+def ZeroAsmOperand : AsmOperandClass {
+  let Name = "Zero";
+}
+def zero : Operand<i32>, PatLeaf<(imm), [{
+    return N->getSExtValue() == 0; }]> {
+  let ParserMatchClass = ZeroAsmOperand;
+}
+
+// uimm0to2 - Special immediate value represents 0, 1, and 2.
+def UImm0to2AsmOperand : AsmOperandClass {
+  let Name = "UImm0to2";
+}
+def uimm0to2 : Operand<i32>, PatLeaf<(imm), [{
+    return N->getZExtValue() < 3; }], ULO7> {
+  let ParserMatchClass = UImm0to2AsmOperand;
+}
+
+// uimm1 - Generic immediate value.
+def UImm1AsmOperand : AsmOperandClass {
+  let Name = "UImm1";
+}
+def uimm1 : Operand<i32>, PatLeaf<(imm), [{
+    return isUInt<1>(N->getZExtValue()); }], ULO7> {
+  let ParserMatchClass = UImm1AsmOperand;
+}
+
+// uimm2 - Generic immediate value.
+def UImm2AsmOperand : AsmOperandClass {
+  let Name = "UImm2";
+}
+def uimm2 : Operand<i32>, PatLeaf<(imm), [{
+    return isUInt<2>(N->getZExtValue()); }], ULO7> {
+  let ParserMatchClass = UImm2AsmOperand;
+}
+
+// uimm3 - Generic immediate value.
+def UImm3AsmOperand : AsmOperandClass {
+  let Name = "UImm3";
+}
+def uimm3 : Operand<i32>, PatLeaf<(imm), [{
+    return isUInt<3>(N->getZExtValue()); }], ULO7> {
+  let ParserMatchClass = UImm3AsmOperand;
+}
+
+// uimm6 - Generic immediate value.
+def UImm6AsmOperand : AsmOperandClass {
+  let Name = "UImm6";
+}
+def uimm6 : Operand<i32>, PatLeaf<(imm), [{
+    return isUInt<6>(N->getZExtValue()); }], ULO7> {
+  let ParserMatchClass = UImm6AsmOperand;
+}
+
+// uimm7 - Generic immediate value.
+def UImm7AsmOperand : AsmOperandClass {
+  let Name = "UImm7";
+}
+def uimm7 : Operand<i32>, PatLeaf<(imm), [{
+    return isUInt<7>(N->getZExtValue()); }], ULO7> {
+  let ParserMatchClass = UImm7AsmOperand;
+}
+
+// simm7 - Generic immediate value.
+def SImm7AsmOperand : AsmOperandClass {
+  let Name = "SImm7";
+}
+def simm7 : Operand<i32>, PatLeaf<(imm), [{
+    return isInt<7>(N->getSExtValue()); }], LO7> {
+  let ParserMatchClass = SImm7AsmOperand;
+  let DecoderMethod = "DecodeSIMM7";
+}
+
+// mimm - Special immediate value of sequential bit stream of 0 or 1.
+def MImmAsmOperand : AsmOperandClass {
+  let Name = "MImm";
+  let ParserMethod = "parseMImmOperand";
+}
+def mimm : Operand<i32>, PatLeaf<(imm), [{
+    return isMImmVal(getImmVal(N)); }], MIMM> {
+  let ParserMatchClass = MImmAsmOperand;
+  let PrintMethod = "printMImmOperand";
+}
+
+// simm7fp - Generic fp immediate value.
+def simm7fp : Operand<i32>, PatLeaf<(fpimm), [{
+    return isInt<7>(getFpImmVal(N));
+  }], LO7FP> {
+  let ParserMatchClass = SImm7AsmOperand;
+  let DecoderMethod = "DecodeSIMM7";
+}
+
+// mimmfp - Special fp immediate value of sequential bit stream of 0 or 1.
+def mimmfp : Operand<i32>, PatLeaf<(fpimm), [{
+    return isMImmVal(getFpImmVal(N)); }], MIMMFP> {
+  let ParserMatchClass = MImmAsmOperand;
+  let PrintMethod = "printMImmOperand";
+}
+
+// mimmfp32 - 32 bit width mimmfp
+//   Float value places at higher bits, so ignore lower 32 bits.
+def mimmfp32 : Operand<i32>, PatLeaf<(fpimm), [{
+    return isMImm32Val(getFpImmVal(N) >> 32); }], MIMMFP> {
+  let ParserMatchClass = MImmAsmOperand;
+  let PrintMethod = "printMImmOperand";
+}
+
+// other generic patterns to use in pattern matchings
 def simm32      : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>;
-def uimm6       : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;
+def uimm32      : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+def lomsbzero   : PatLeaf<(imm), [{ return (N->getZExtValue() & 0x80000000)
+                                      == 0; }]>;
+def lozero      : PatLeaf<(imm), [{ return (N->getZExtValue() & 0xffffffff)
+                                      == 0; }]>;
+def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
+                                        == 0; }]>;
+def fplozero    : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
+                                        == 0; }]>;
+
+def CCSIOp : PatLeaf<(cond), [{
+  switch (N->get()) {
+  default:          return true;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return false;
+  }
+}]>;
+
+def CCUIOp : PatLeaf<(cond), [{
+  switch (N->get()) {
+  default:         return true;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE: return false;
+  }
+}]>;
 
-// ASX format of memory address
-def MEMri : Operand<iPTR> {
+//===----------------------------------------------------------------------===//
+// Addressing modes.
+// SX-Aurora has following fields.
+//    sz: register or 0
+//    sy: register or immediate (-64 to 63)
+//    disp: immediate (-2147483648 to 2147483647)
+//
+// There are two kinds of instruction.
+//    ASX format uses sz + sy + disp.
+//    AS format uses sz + disp.
+//
+// Moreover, there are four kinds of assembly instruction format.
+//    ASX format uses "disp", "disp(, sz)", "disp(sy)", "disp(sy, sz)",
+//    "(, sz)", "(sy)", or "(sy, sz)".
+//    AS format uses "disp", "disp(, sz)", or "(, sz)" in general.
+//    AS format in RRM format uses "disp", "disp(sz)", or "(sz)".
+//    AS format in RRM format for host memory access uses "sz", "(sz)",
+//    or "disp(sz)".
+//
+// We defined them below.
+//
+// ASX format:
+//    MEMrri, MEMrii, MEMzri, MEMzii
+// AS format:
+//    MEMriASX, MEMziASX    : simple AS format
+//    MEMriRRM, MEMziRRM    : AS format in RRM format
+//    MEMriHM, MEMziHM      : AS format in RRM format for host memory access
+//===----------------------------------------------------------------------===//
+
+// DAG selections for both ASX and AS formats.
+def ADDRrri : ComplexPattern<iPTR, 3, "selectADDRrri", [frameindex], []>;
+def ADDRrii : ComplexPattern<iPTR, 3, "selectADDRrii", [frameindex], []>;
+def ADDRzri : ComplexPattern<iPTR, 3, "selectADDRzri", [], []>;
+def ADDRzii : ComplexPattern<iPTR, 3, "selectADDRzii", [], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "selectADDRri", [frameindex], []>;
+def ADDRzi : ComplexPattern<iPTR, 2, "selectADDRzi", [], []>;
+
+// ASX format.
+def VEMEMrriAsmOperand : AsmOperandClass {
+  let Name = "MEMrri";
+  let ParserMethod = "parseMEMOperand";
+}
+def VEMEMriiAsmOperand : AsmOperandClass {
+  let Name = "MEMrii";
+  let ParserMethod = "parseMEMOperand";
+}
+def VEMEMzriAsmOperand : AsmOperandClass {
+  let Name = "MEMzri";
+  let ParserMethod = "parseMEMOperand";
+}
+def VEMEMziiAsmOperand : AsmOperandClass {
+  let Name = "MEMzii";
+  let ParserMethod = "parseMEMOperand";
+}
+
+// ASX format uses single assembly instruction format.
+def MEMrri : Operand<iPTR> {
+  let PrintMethod = "printMemASXOperand";
+  let MIOperandInfo = (ops ptr_rc, ptr_rc, i32imm);
+  let ParserMatchClass = VEMEMrriAsmOperand;
+}
+def MEMrii : Operand<iPTR> {
   let PrintMethod = "printMemASXOperand";
-  let MIOperandInfo = (ops ptr_rc, i64imm);
+  let MIOperandInfo = (ops ptr_rc, i32imm, i32imm);
+  let ParserMatchClass = VEMEMriiAsmOperand;
+}
+def MEMzri : Operand<iPTR> {
+  let PrintMethod = "printMemASXOperand";
+  let MIOperandInfo = (ops i32imm /* = 0 */, ptr_rc, i32imm);
+  let ParserMatchClass = VEMEMzriAsmOperand;
+}
+def MEMzii : Operand<iPTR> {
+  let PrintMethod = "printMemASXOperand";
+  let MIOperandInfo = (ops i32imm /* = 0 */, i32imm, i32imm);
+  let ParserMatchClass = VEMEMziiAsmOperand;
 }
 
-// AS format of memory address
-def MEMASri : Operand<iPTR> {
-  let PrintMethod = "printMemASOperand";
-  let MIOperandInfo = (ops ptr_rc, i64imm);
+// AS format.
+def VEMEMriAsmOperand : AsmOperandClass {
+  let Name = "MEMri";
+  let ParserMethod = "parseMEMAsOperand";
+}
+def VEMEMziAsmOperand : AsmOperandClass {
+  let Name = "MEMzi";
+  let ParserMethod = "parseMEMAsOperand";
 }
 
-// Branch targets have OtherVT type.
-def brtarget32 : Operand<OtherVT> {
-  let EncoderMethod = "getBranchTarget32OpValue";
+// AS format uses multiple assembly instruction formats
+//   1. AS generic assembly instruction format:
+def MEMriASX : Operand<iPTR> {
+  let PrintMethod = "printMemASOperandASX";
+  let MIOperandInfo = (ops ptr_rc, i32imm);
+  let ParserMatchClass = VEMEMriAsmOperand;
+}
+def MEMziASX : Operand<iPTR> {
+  let PrintMethod = "printMemASOperandASX";
+  let MIOperandInfo = (ops i32imm /* = 0 */, i32imm);
+  let ParserMatchClass = VEMEMziAsmOperand;
 }
 
-def simm7Op64 : Operand<i64> {
-  let DecoderMethod = "DecodeSIMM7";
+//   2. AS RRM style assembly instruction format:
+def MEMriRRM : Operand<iPTR> {
+  let PrintMethod = "printMemASOperandRRM";
+  let MIOperandInfo = (ops ptr_rc, i32imm);
+  let ParserMatchClass = VEMEMriAsmOperand;
+}
+def MEMziRRM : Operand<iPTR> {
+  let PrintMethod = "printMemASOperandRRM";
+  let MIOperandInfo = (ops i32imm /* = 0 */, i32imm);
+  let ParserMatchClass = VEMEMziAsmOperand;
 }
 
-def simm32Op64 : Operand<i64> {
-  let DecoderMethod = "DecodeSIMM32";
+//   3. AS HM style assembly instruction format:
+def MEMriHM : Operand<iPTR> {
+  let PrintMethod = "printMemASOperandHM";
+  let MIOperandInfo = (ops ptr_rc, i32imm);
+  let ParserMatchClass = VEMEMriAsmOperand;
+}
+def MEMziHM : Operand<iPTR> {
+  let PrintMethod = "printMemASOperandHM";
+  let MIOperandInfo = (ops i32imm /* = 0 */, i32imm);
+  let ParserMatchClass = VEMEMziAsmOperand;
 }
 
-def uimm6Op64 : Operand<i64> {
-  let DecoderMethod = "DecodeUIMM6";
+//===----------------------------------------------------------------------===//
+// Other operands.
+//===----------------------------------------------------------------------===//
+
+// Branch targets have OtherVT type.
+def brtarget32 : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTargetOpValue";
+  let DecoderMethod = "DecodeSIMM32";
 }
 
 // Operand for printing out a condition code.
-let PrintMethod = "printCCOperand" in
-  def CCOp : Operand<i32>;
+def CCOpAsmOperand : AsmOperandClass { let Name = "CCOp"; }
+def CCOp : Operand<i32>, ImmLeaf<i32, [{
+    return Imm >= 0 && Imm < 22; }], CCOP> {
+  let PrintMethod = "printCCOperand";
+  let DecoderMethod = "DecodeCCOperand";
+  let EncoderMethod = "getCCOpValue";
+  let ParserMatchClass = CCOpAsmOperand;
+}
+
+// Operand for a rounding mode code.
+def RDOpAsmOperand : AsmOperandClass {
+  let Name = "RDOp";
+}
+def RDOp : Operand<i32> {
+  let PrintMethod = "printRDOperand";
+  let DecoderMethod = "DecodeRDOperand";
+  let EncoderMethod = "getRDOpValue";
+  let ParserMatchClass = RDOpAsmOperand;
+}
+
+def VEhi    : SDNode<"VEISD::Hi", SDTIntUnaryOp>;
+def VElo    : SDNode<"VEISD::Lo", SDTIntUnaryOp>;
 
 //  These are target-independent nodes, but have target-specific formats.
 def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64>,
@@ -72,10 +420,29 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-// def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def call          : SDNode<"VEISD::CALL", SDT_SPCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
 
 def retflag       : SDNode<"VEISD::RET_FLAG", SDTNone,
                            [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def getGOT        : Operand<iPTR>;
+
+// GETFUNPLT for PIC
+def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>;
+
+// GETTLSADDR for TLS
+def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                         SDNPVariadic]>;
+
+// GETSTACKTOP
+def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
+                        [SDNPHasChain, SDNPSideEffect]>;
+
+
 //===----------------------------------------------------------------------===//
 // VE Flag Conditions
 //===----------------------------------------------------------------------===//
@@ -107,168 +474,1243 @@ def CC_LENAN : CC_VAL<20>;  // Less or Equal or NaN
 def CC_AT    : CC_VAL<21>;  // Always true
 
 //===----------------------------------------------------------------------===//
+// VE Rounding Mode
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the VERD::RoundingMode enum
+// values.
+class RD_VAL<int N> : PatLeaf<(i32 N)>;
+def RD_NONE  : RD_VAL< 0>;  // According to PSW
+def RD_RZ    : RD_VAL< 8>;  // Round toward Zero
+def RD_RP    : RD_VAL< 9>;  // Round toward Plus infinity
+def RD_RM    : RD_VAL<10>;  // Round toward Minus infinity
+def RD_RN    : RD_VAL<11>;  // Round to Nearest (ties to Even)
+def RD_RA    : RD_VAL<12>;  // Round to Nearest (ties to Away)
+
+//===----------------------------------------------------------------------===//
 // VE Multiclasses for common instruction formats
 //===----------------------------------------------------------------------===//
 
-multiclass RMm<string opcStr, bits<8>opc,
-               RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> {
-  def rri : RM<
-    opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, immOp2:$imm32),
-    !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})")> {
-    let cy = 1;
-    let cz = 1;
-    let hasSideEffects = 0;
+// Multiclass for generic RR type instructions
+let hasSideEffects = 0 in
+multiclass RRbm<string opcStr, bits<8>opc,
+                RegisterClass RCo, ValueType Tyo,
+                RegisterClass RCi, ValueType Tyi,
+                SDPatternOperator OpNode = null_frag,
+                Operand immOp = simm7, Operand mOp = mimm> {
+  def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
+  // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate
+  // in RHS, so we use following definition.
+  let cy = 0 in
+  def ri : RR<opc, (outs RCo:$sx), (ins RCi:$sz, immOp:$sy),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode Tyi:$sz, (Tyi immOp:$sy)))]>;
+  let cz = 0 in
+  def rm : RR<opc, (outs RCo:$sx), (ins RCi:$sy, mOp:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode Tyi:$sy, (Tyi mOp:$sz)))]>;
+  let cy = 0, cz = 0 in
+  def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
+}
+
+// Multiclass for non-commutative RR type instructions
+let hasSideEffects = 0 in
+multiclass RRNCbm<string opcStr, bits<8>opc,
+                RegisterClass RCo, ValueType Tyo,
+                RegisterClass RCi, ValueType Tyi,
+                SDPatternOperator OpNode = null_frag,
+                Operand immOp = simm7, Operand mOp = mimm> {
+  def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
+  let cy = 0 in
+  def ir : RR<opc, (outs RCo:$sx), (ins immOp:$sy, RCi:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), Tyi:$sz))]>;
+  let cz = 0 in
+  def rm : RR<opc, (outs RCo:$sx), (ins RCi:$sy, mOp:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode Tyi:$sy, (Tyi mOp:$sz)))]>;
+  let cy = 0, cz = 0 in
+  def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz"),
+              [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
+}
+
+// Generic RR multiclass with 2 arguments.
+//   e.g. ADDUL, ADDSWSX, ADDSWZX, and etc.
+multiclass RRm<string opcStr, bits<8>opc,
+               RegisterClass RC, ValueType Ty,
+               SDPatternOperator OpNode = null_frag,
+               Operand immOp = simm7, Operand mOp = mimm> :
+  RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+
+// Generic RR multiclass for non-commutative instructions with 2 arguments.
+//   e.g. SUBUL, SUBUW, SUBSWSX, and etc.
+multiclass RRNCm<string opcStr, bits<8>opc,
+                 RegisterClass RC, ValueType Ty,
+                 SDPatternOperator OpNode = null_frag,
+                 Operand immOp = simm7, Operand mOp = mimm> :
+  RRNCbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+
+// Generic RR multiclass for floating point instructions with 2 arguments.
+//   e.g. FADDD, FADDS, FSUBD, and etc.
+multiclass RRFm<string opcStr, bits<8>opc,
+                RegisterClass RC, ValueType Ty,
+                SDPatternOperator OpNode = null_frag,
+                Operand immOp = simm7fp, Operand mOp = mimmfp> :
+  RRNCbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+
+// Generic RR multiclass for shift instructions with 2 arguments.
+//   e.g. SLL, SRL, SLAWSX, and etc.
+let hasSideEffects = 0 in
+multiclass RRIm<string opcStr, bits<8>opc,
+                RegisterClass RC, ValueType Ty,
+                SDPatternOperator OpNode = null_frag> {
+  def rr : RR<opc, (outs RC:$sx), (ins RC:$sz, I32:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode Ty:$sz, i32:$sy))]>;
+  let cz = 0 in
+  def mr : RR<opc, (outs RC:$sx), (ins mimm:$sz, I32:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode (Ty mimm:$sz), i32:$sy))]>;
+  let cy = 0 in
+  def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, uimm7:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode Ty:$sz, (i32 uimm7:$sy)))]>;
+  let cy = 0, cz = 0 in
+  def mi : RR<opc, (outs RC:$sx), (ins mimm:$sz, uimm7:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode (Ty mimm:$sz), (i32 uimm7:$sy)))]>;
+}
+
+// Special RR multiclass for 128 bits shift left instruction.
+//   e.g. SLD
+let Constraints = "$hi = $sx", DisableEncoding = "$hi", hasSideEffects = 0 in
+multiclass RRILDm<string opcStr, bits<8>opc,
+                  RegisterClass RC, ValueType Ty,
+                  SDPatternOperator OpNode = null_frag> {
+  def rrr : RR<opc, (outs RC:$sx), (ins RC:$hi, RC:$sz, I32:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cz = 0 in
+  def rmr : RR<opc, (outs RC:$sx), (ins RC:$hi, mimm:$sz, I32:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0 in
+  def rri : RR<opc, (outs RC:$sx), (ins RC:$hi, RC:$sz, uimm7:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0, cz = 0 in
+  def rmi : RR<opc, (outs RC:$sx), (ins RC:$hi, mimm:$sz, uimm7:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+}
+
+// Special RR multiclass for 128 bits shift right instruction.
+//   e.g. SRD
+let Constraints = "$low = $sx", DisableEncoding = "$low", hasSideEffects = 0 in
+multiclass RRIRDm<string opcStr, bits<8>opc,
+                  RegisterClass RC, ValueType Ty,
+                  SDPatternOperator OpNode = null_frag> {
+  def rrr : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$low, I32:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cz = 0 in
+  def mrr : RR<opc, (outs RC:$sx), (ins mimm:$sz, RC:$low, I32:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0 in
+  def rri : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$low, uimm7:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0, cz = 0 in
+  def mri : RR<opc, (outs RC:$sx), (ins mimm:$sz, RC:$low, uimm7:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+}
+
+// Generic RR multiclass with an argument.
+//   e.g. LDZ, PCNT, and  BRV
+let cy = 0, sy = 0, hasSideEffects = 0 in
+multiclass RRI1m<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+                 SDPatternOperator OpNode = null_frag> {
+  def r : RR<opc, (outs RC:$sx), (ins RC:$sz), !strconcat(opcStr, " $sx, $sz"),
+             [(set Ty:$sx, (OpNode Ty:$sz))]>;
+  let cz = 0 in
+  def m : RR<opc, (outs RC:$sx), (ins mimm:$sz),
+             !strconcat(opcStr, " $sx, $sz"),
+             [(set Ty:$sx, (OpNode (Ty mimm:$sz)))]>;
+}
+
+// Special RR multiclass for MRG instruction.
+//   e.g. MRG
+let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0 in
+multiclass RRMRGm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+  def rr : RR<opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, RC:$sd),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cy = 0 in
+  def ir : RR<opc, (outs RC:$sx), (ins simm7:$sy, RC:$sz, RC:$sd),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cz = 0 in
+  def rm : RR<opc, (outs RC:$sx), (ins RC:$sy, mimm:$sz, RC:$sd),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cy = 0, cz = 0 in
+  def im : RR<opc, (outs RC:$sx), (ins simm7:$sy, mimm:$sz, RC:$sd),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+}
+
+// Special RR multiclass for BSWP instruction.
+//   e.g. BSWP
+let hasSideEffects = 0 in
+multiclass RRSWPm<string opcStr, bits<8>opc,
+                  RegisterClass RC, ValueType Ty,
+                  SDPatternOperator OpNode = null_frag> {
+  let cy = 0 in
+  def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, uimm1:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode Ty:$sz, (i32 uimm1:$sy)))]>;
+  let cy = 0, cz = 0 in
+  def mi : RR<opc, (outs RC:$sx), (ins mimm:$sz, uimm1:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode (Ty mimm:$sz), (i32 uimm1:$sy)))]>;
+}
+
+// Multiclass for CMOV instructions.
+//   e.g. CMOVL, CMOVW, CMOVD, and etc.
+let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0,
+    cfw = ? in
+multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+  def rr : RR<opc, (outs I64:$sx), (ins CCOp:$cfw, RC:$sy, I64:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0 in
+  def ir : RR<opc, (outs I64:$sx),
+              (ins CCOp:$cfw, simm7:$sy, I64:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cz = 0 in
+  def rm : RR<opc, (outs I64:$sx),
+              (ins CCOp:$cfw, RC:$sy, mimm:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0, cz = 0 in
+  def im : RR<opc, (outs I64:$sx),
+              (ins CCOp:$cfw, simm7:$sy, mimm:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+}
+
+// Multiclass for floating point conversion instructions.
+//   e.g. CVTWDSX, CVTWDZX, CVTWSSX, and etc.
+// sz{3-0} = rounding mode
+let cz = 0, hasSideEffects = 0 in
+multiclass CVTRDm<string opcStr, bits<8> opc, RegisterClass RCo, ValueType Tyo,
+                  RegisterClass RCi, ValueType Tyi> {
+  def r : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, RCi:$sy),
+             !strconcat(opcStr, "${rd} $sx, $sy")> {
+    bits<4> rd;
+    let sz{5-4} = 0;
+    let sz{3-0} = rd;
   }
-  def zzi : RM<
-    opc, (outs RC:$sx), (ins immOp2:$imm32),
-    !strconcat(opcStr, " $sx, $imm32")> {
-    let cy = 0;
-    let sy = 0;
-    let cz = 0;
-    let sz = 0;
-    let hasSideEffects = 0;
+  let cy = 0 in
+  def i : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, simm7:$sy),
+             !strconcat(opcStr, "${rd} $sx, $sy")> {
+    bits<4> rd;
+    let sz{5-4} = 0;
+    let sz{3-0} = rd;
   }
 }
 
-// Multiclass for RR type instructions
+// Multiclass for floating point conversion instructions.
+//   e.g. CVTDW, CVTSW, CVTDL, and etc.
+let cz = 0, sz = 0, hasSideEffects = 0 in
+multiclass CVTm<string opcStr, bits<8> opc, RegisterClass RCo, ValueType Tyo,
+                RegisterClass RCi, ValueType Tyi,
+                SDPatternOperator OpNode = null_frag> {
+  def r : RR<opc, (outs RCo:$sx), (ins RCi:$sy),
+             !strconcat(opcStr, " $sx, $sy"),
+             [(set Tyo:$sx, (OpNode Tyi:$sy))]>;
+  let cy = 0 in
+  def i : RR<opc, (outs RCo:$sx), (ins simm7:$sy),
+             !strconcat(opcStr, " $sx, $sy")>;
+}
 
-multiclass RRmrr<string opcStr, bits<8>opc,
-                 RegisterClass RCo, ValueType Tyo,
-                 RegisterClass RCi, ValueType Tyi> {
-  def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
-              !strconcat(opcStr, " $sx, $sy, $sz")>
-           { let cy = 1; let cz = 1; let hasSideEffects = 0; }
+// Multiclass for PFCH instructions.
+//   e.g. PFCH
+let sx = 0, hasSideEffects = 0 in
+multiclass PFCHm<string opcStr, bits<8>opc> {
+  def rri : RM<opc, (outs), (ins MEMrri:$addr), !strconcat(opcStr, " $addr"),
+               [(prefetch ADDRrri:$addr, imm, imm, (i32 1))]>;
+  let cy = 0 in
+  def rii : RM<opc, (outs), (ins MEMrii:$addr), !strconcat(opcStr, " $addr"),
+               [(prefetch ADDRrii:$addr, imm, imm, (i32 1))]>;
+  let cz = 0 in
+  def zri : RM<opc, (outs), (ins MEMzri:$addr), !strconcat(opcStr, " $addr"),
+               [(prefetch ADDRzri:$addr, imm, imm, (i32 1))]>;
+  let cy = 0, cz = 0 in
+  def zii : RM<opc, (outs), (ins MEMzii:$addr), !strconcat(opcStr, " $addr"),
+               [(prefetch ADDRzii:$addr, imm, imm, (i32 1))]>;
 }
 
-multiclass RRmri<string opcStr, bits<8>opc,
-                 RegisterClass RCo, ValueType Tyo,
-                 RegisterClass RCi, ValueType Tyi, Operand immOp> {
-  // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate
-  // in RHS, so we use following definition.
-  def ri : RR<opc, (outs RCo:$sx), (ins RCi:$sz, immOp:$sy),
-              !strconcat(opcStr, " $sx, $sy, $sz")>
-           { let cy = 0; let cz = 1; let hasSideEffects = 0; }
+// Multiclass for CAS instructions.
+//   e.g. TS1AML, TS1AMW, TS2AM, and etc.
+let Constraints = "$dest = $sd", DisableEncoding = "$sd",
+    mayStore=1, mayLoad = 1, hasSideEffects = 0 in
+multiclass RRCAStgm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+                    Operand immOp, Operand MEM, Operand ADDR,
+                    SDPatternOperator OpNode = null_frag> {
+  def r : RRM<opc, (outs RC:$dest), (ins MEM:$addr, RC:$sy, RC:$sd),
+              !strconcat(opcStr, " $dest, $addr, $sy"),
+              [(set Ty:$dest, (OpNode ADDR:$addr, Ty:$sy, Ty:$sd))]>;
+  let cy = 0 in
+  def i : RRM<opc, (outs RC:$dest), (ins MEM:$addr, immOp:$sy, RC:$sd),
+              !strconcat(opcStr, " $dest, $addr, $sy"),
+              [(set Ty:$dest, (OpNode ADDR:$addr, (Ty immOp:$sy), Ty:$sd))]>;
+}
+multiclass RRCASm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+                  Operand immOp, SDPatternOperator OpNode = null_frag> {
+  defm ri : RRCAStgm<opcStr, opc, RC, Ty, immOp, MEMriRRM, ADDRri, OpNode>;
+  let cz = 0 in
+  defm zi : RRCAStgm<opcStr, opc, RC, Ty, immOp, MEMziRRM, ADDRzi, OpNode>;
 }
 
-multiclass RRmiz<string opcStr, bits<8>opc,
-                 RegisterClass RCo, ValueType Tyo,
-                 RegisterClass RCi, ValueType Tyi, Operand immOp> {
-  def zi : RR<opc, (outs RCo:$sx), (ins immOp:$sy),
-              !strconcat(opcStr, " $sx, $sy")>
-           { let cy = 0; let cz = 0; let sz = 0; let hasSideEffects = 0; }
+// Multiclass for branch instructions
+//   e.g. BCFL, BCFW, BCFD, and etc.
+let isBranch = 1, isTerminator = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+multiclass BCbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond,
+                  Operand ADDR> {
+  let bpf = 0 /* NONE */ in
+  def "" : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+              !strconcat(opcStr, " ", cmpStr, "$addr")>;
+  let bpf = 2 /* NOT TaKEN */ in
+  def _nt : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+               !strconcat(opcStr, ".nt ", cmpStr, "$addr")>;
+  let bpf = 3 /* TaKEN */ in
+  def _t : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+              !strconcat(opcStr, ".t ", cmpStr, "$addr")>;
+}
+multiclass BCtgm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
+  defm ri : BCbpfm<opcStr, cmpStr, opc, cond, MEMriASX>;
+  let cz = 0 in defm zi : BCbpfm<opcStr, cmpStr, opc, cond, MEMziASX>;
+}
+multiclass BCm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
+               RegisterClass RC, Operand immOp> {
+  let DecoderMethod = "DecodeBranchCondition" in
+  defm r : BCtgm<opcStr, "$comp, ", opc, (ins CCOp:$cond, RC:$comp)>;
+  let DecoderMethod = "DecodeBranchCondition", cy = 0 in
+  defm i : BCtgm<opcStr, "$comp, ", opc, (ins CCOp:$cond, immOp:$comp)>;
+  let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0,
+      cf = 15 /* AT */, isBarrier = 1 in
+  defm a : BCtgm<opcStrAt, "", opc, (ins)>;
+  let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0,
+      cf = 0 /* AF */ in
+  defm na : BCtgm<opcStrAf, "", opc, (ins)>;
 }
 
-multiclass RRNDmrm<string opcStr, bits<8>opc,
-                   RegisterClass RCo, ValueType Tyo,
-                   RegisterClass RCi, ValueType Tyi, Operand immOp2> {
-  def rm0 : RR<opc, (outs RCo:$sx), (ins RCi:$sy, immOp2:$sz),
-               !strconcat(opcStr, " $sx, $sy, (${sz})0")> {
-              let cy = 1;
-              let cz = 0;
-              let sz{6} = 1;
-              // (guess) tblgen conservatively assumes hasSideEffects when
-              // it fails to infer from a pattern.
-              let hasSideEffects = 0;
-            }
+// Multiclass for relative branch instructions
+//   e.g. BRCFL, BRCFW, BRCFD, and etc.
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0 in
+multiclass BCRbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
+  let bpf = 0 /* NONE */ in
+  def "" : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
+              !strconcat(opcStr, " ", cmpStr, "$imm32")>;
+  let bpf = 2 /* NOT TaKEN */ in
+  def _nt : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
+               !strconcat(opcStr, ".nt ", cmpStr, "$imm32")>;
+  let bpf = 3 /* TaKEN */ in
+  def _t : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
+              !strconcat(opcStr, ".t ", cmpStr, "$imm32")>;
+}
+multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
+               RegisterClass RC, Operand immOp> {
+  defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, RC:$sz)>;
+  let cy = 0 in
+  defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, RC:$sz)>;
+  let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in
+  defm a : BCRbpfm<opcStrAt, "", opc, (ins)>;
+  let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in
+  defm na : BCRbpfm<opcStrAf, "", opc, (ins)>;
 }
 
-// Used by add, mul, div, and similar commutative instructions
-//   The order of operands are "$sx, $sy, $sz"
+// Multiclass for communication register instructions.
+//   e.g. LCR
+let hasSideEffects = 1 in
+multiclass LOADCRm<string opcStr, bits<8>opc, RegisterClass RC> {
+  def rr : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$sy),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cy = 0 in def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, simm7:$sy),
+                            !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cz = 0 in def zr : RR<opc, (outs RC:$sx), (ins zero:$sz, RC:$sy),
+                            !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cy = 0, cz = 0 in
+  def zi : RR<opc, (outs RC:$sx), (ins zero:$sz, simm7:$sy),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+}
 
-multiclass RRm<string opcStr, bits<8>opc,
-               RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> :
-  RRmrr<opcStr, opc, RC, Ty, RC, Ty>,
-  RRmri<opcStr, opc, RC, Ty, RC, Ty, immOp>,
-  RRmiz<opcStr, opc, RC, Ty, RC, Ty, immOp>,
-  RRNDmrm<opcStr, opc, RC, Ty, RC, Ty, immOp2>;
-
-// Branch multiclass
-let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in
-multiclass BCRm<string opcStr, string opcStrAt, bits<8> opc,
-                RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> {
-  def rr : CF<
-    opc, (outs),
-    (ins CCOp:$cf, RC:$sy, RC:$sz, brtarget32:$imm32),
-    !strconcat(opcStr, " $sy, $sz, $imm32")> {
-    let cy = 1;
-    let cz = 1;
-    let hasSideEffects = 0;
-  }
+// Multiclass for communication register instructions.
+//   e.g. SCR
+let hasSideEffects = 1 in
+multiclass STORECRm<string opcStr, bits<8>opc, RegisterClass RC> {
+  def rr : RR<opc, (outs), (ins RC:$sz, RC:$sy, RC:$sx),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cy = 0 in def ri : RR<opc, (outs), (ins RC:$sz, simm7:$sy, RC:$sx),
+                            !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cz = 0 in def zr : RR<opc, (outs), (ins zero:$sz, RC:$sy, RC:$sx),
+                            !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cy = 0, cz = 0 in
+  def zi : RR<opc, (outs), (ins zero:$sz, simm7:$sy, RC:$sx),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
 }
 
+// Multiclass for communication register instructions.
+//   e.g. FIDCR
+let cz = 0, hasSideEffects = 1 in
+multiclass FIDCRm<string opcStr, bits<8>opc, RegisterClass RC> {
+  def ri : RR<opc, (outs RC:$sx), (ins RC:$sy, uimm3:$sz),
+              !strconcat(opcStr, " $sx, $sy, $sz")>;
+  let cy = 0 in def ii : RR<opc, (outs RC:$sx), (ins simm7:$sy, uimm3:$sz),
+                            !strconcat(opcStr, " $sx, $sy, $sz")>;
+}
+
+// Multiclass for LHM instruction.
+let mayLoad = 1, hasSideEffects = 0 in
+multiclass LHMm<string opcStr, bits<8> opc, RegisterClass RC> {
+  def ri : RRMHM<opc, (outs RC:$dest), (ins MEMriHM:$addr),
+                 !strconcat(opcStr, " $dest, $addr")>;
+  let cz = 0 in
+  def zi : RRMHM<opc, (outs RC:$dest), (ins MEMziHM:$addr),
+                 !strconcat(opcStr, " $dest, $addr")>;
+}
+
+// Multiclass for SHM instruction.
+let mayStore = 1, hasSideEffects = 0 in
+multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
+  def ri : RRMHM<opc, (outs), (ins MEMriHM:$addr, RC:$sx),
+                 !strconcat(opcStr, " $sx, $addr")>;
+  let cz = 0 in
+  def zi : RRMHM<opc, (outs), (ins MEMziHM:$addr, RC:$sx),
+                 !strconcat(opcStr, " $sx, $addr")>;
+}
 
 //===----------------------------------------------------------------------===//
 // Instructions
+//
+// Define all scalar instructions defined in SX-Aurora TSUBASA Architecture
+// Guide here.  As those mnemonics, we use mnemonics defined in Vector Engine
+// Assembly Language Reference Manual.
 //===----------------------------------------------------------------------===//
 
-// LEA and LEASL instruction (load 32 bit imm to low or high part)
-let cx = 0 in
-defm LEA : RMm<"lea", 0x06, I64, i64, simm7Op64, simm32Op64>;
+//-----------------------------------------------------------------------------
+// Section 8.2 - Load/Store instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for generic RM instructions
+multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
+  def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
+               !strconcat(opcStr, " $dest, $addr"), []>;
+  let cy = 0 in
+  def rii : RM<opc, (outs RC:$dest), (ins MEMrii:$addr),
+               !strconcat(opcStr, " $dest, $addr"), []>;
+  let cz = 0 in
+  def zri : RM<opc, (outs RC:$dest), (ins MEMzri:$addr),
+               !strconcat(opcStr, " $dest, $addr"), []>;
+  let cy = 0, cz = 0 in
+  def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
+               !strconcat(opcStr, " $dest, $addr"), []>;
+}
+
+// Section 8.2.1 - LEA
+let cx = 0, DecoderMethod = "DecodeLoadI64" in
+defm LEA : RMm<"lea", 0x06, I64>;
+let cx = 1, DecoderMethod = "DecodeLoadI64" in
+defm LEASL : RMm<"lea.sl", 0x06, I64>;
+let cx = 0, DecoderMethod = "DecodeLoadI32", isCodeGenOnly = 1 in
+defm LEA32 : RMm<"lea", 0x06, I32>;
+
+def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
+def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
+def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>;
+def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
+def : Pat<(add I32:$base, simm32:$disp),
+          (LEA32rii (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $base, sub_i32), 0,
+                    (LO32 $disp))>;
+
+def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
+                       [(add (add node:$base, node:$idx), node:$disp),
+                        (add (add node:$base, node:$disp), node:$idx)]>;
+def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
+          (LEArii $base, (LO7 $idx), (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
+          (LEArri $base, $idx, (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
+          (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
+          (LEASLrri $base, $idx, (HI32 $disp))>;
+
+// Multiclass for load instructions.
+let mayLoad = 1, hasSideEffects = 0 in
+multiclass LOADm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
+                 SDPatternOperator OpNode = null_frag> {
+  def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
+               !strconcat(opcStr, " $dest, $addr"),
+               [(set Ty:$dest, (OpNode ADDRrri:$addr))]>;
+  let cy = 0 in
+  def rii : RM<opc, (outs RC:$dest), (ins MEMrii:$addr),
+               !strconcat(opcStr, " $dest, $addr"),
+               [(set Ty:$dest, (OpNode ADDRrii:$addr))]>;
+  let cz = 0 in
+  def zri : RM<opc, (outs RC:$dest), (ins MEMzri:$addr),
+               !strconcat(opcStr, " $dest, $addr"),
+               [(set Ty:$dest, (OpNode ADDRzri:$addr))]>;
+  let cy = 0, cz = 0 in
+  def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
+               !strconcat(opcStr, " $dest, $addr"),
+               [(set Ty:$dest, (OpNode ADDRzii:$addr))]>;
+}
+
+// Section 8.2.2 - LDS
+let DecoderMethod = "DecodeLoadI64" in
+defm LD : LOADm<"ld", 0x01, I64, i64, load>;
+def : Pat<(f64 (load ADDRrri:$addr)), (LDrri MEMrri:$addr)>;
+def : Pat<(f64 (load ADDRrii:$addr)), (LDrii MEMrii:$addr)>;
+def : Pat<(f64 (load ADDRzri:$addr)), (LDzri MEMzri:$addr)>;
+def : Pat<(f64 (load ADDRzii:$addr)), (LDzii MEMzii:$addr)>;
+
+// Section 8.2.3 - LDU
+let DecoderMethod = "DecodeLoadF32" in
+defm LDU : LOADm<"ldu", 0x02, F32, f32, load>;
+
+// Section 8.2.4 - LDL
+let DecoderMethod = "DecodeLoadI32" in
+defm LDLSX : LOADm<"ldl.sx", 0x03, I32, i32, load>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm LDLZX : LOADm<"ldl.zx", 0x03, I32, i32, load>;
+
+// Section 8.2.5 - LD2B
+let DecoderMethod = "DecodeLoadI32" in
+defm LD2BSX : LOADm<"ld2b.sx", 0x04, I32, i32, sextloadi16>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm LD2BZX : LOADm<"ld2b.zx", 0x04, I32, i32, zextloadi16>;
+
+// Section 8.2.6 - LD1B
+let DecoderMethod = "DecodeLoadI32" in
+defm LD1BSX : LOADm<"ld1b.sx", 0x05, I32, i32, sextloadi8>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm LD1BZX : LOADm<"ld1b.zx", 0x05, I32, i32, zextloadi8>;
+
+// Multiclass for store instructions.
+let mayStore = 1 in
+multiclass STOREm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
+                  SDPatternOperator OpNode = null_frag> {
+  def rri : RM<opc, (outs), (ins MEMrri:$addr, RC:$sx),
+               !strconcat(opcStr, " $sx, $addr"),
+               [(OpNode Ty:$sx, ADDRrri:$addr)]>;
+  let cy = 0 in
+  def rii : RM<opc, (outs), (ins MEMrii:$addr, RC:$sx),
+               !strconcat(opcStr, " $sx, $addr"),
+               [(OpNode Ty:$sx, ADDRrii:$addr)]>;
+  let cz = 0 in
+  def zri : RM<opc, (outs), (ins MEMzri:$addr, RC:$sx),
+               !strconcat(opcStr, " $sx, $addr"),
+               [(OpNode Ty:$sx, ADDRzri:$addr)]>;
+  let cy = 0, cz = 0 in
+  def zii : RM<opc, (outs), (ins MEMzii:$addr, RC:$sx),
+               !strconcat(opcStr, " $sx, $addr"),
+               [(OpNode Ty:$sx, ADDRzii:$addr)]>;
+}
+
+// Section 8.2.7 - STS
+let DecoderMethod = "DecodeStoreI64" in
+defm ST : STOREm<"st", 0x11, I64, i64, store>;
+def : Pat<(store f64:$src, ADDRrri:$addr), (STrri MEMrri:$addr, $src)>;
+def : Pat<(store f64:$src, ADDRrii:$addr), (STrii MEMrii:$addr, $src)>;
+def : Pat<(store f64:$src, ADDRzri:$addr), (STzri MEMzri:$addr, $src)>;
+def : Pat<(store f64:$src, ADDRzii:$addr), (STzii MEMzii:$addr, $src)>;
+
+// Section 8.2.8 - STU
+let DecoderMethod = "DecodeStoreF32" in
+defm STU : STOREm<"stu", 0x12, F32, f32, store>;
+
+// Section 8.2.9 - STL
+let DecoderMethod = "DecodeStoreI32" in
+defm STL : STOREm<"stl", 0x13, I32, i32, store>;
+
+// Section 8.2.10 - ST2B
+let DecoderMethod = "DecodeStoreI32" in
+defm ST2B : STOREm<"st2b", 0x14, I32, i32, truncstorei16>;
+
+// Section 8.2.11 - ST1B
+let DecoderMethod = "DecodeStoreI32" in
+defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>;
+
+// Section 8.2.12 - DLDS
+let DecoderMethod = "DecodeLoadI64" in
+defm DLD : LOADm<"dld", 0x09, I64, i64, load>;
+
+// Section 8.2.13 - DLDU
+let DecoderMethod = "DecodeLoadF32" in
+defm DLDU : LOADm<"dldu", 0x0a, F32, f32, load>;
+
+// Section 8.2.14 - DLDL
+let DecoderMethod = "DecodeLoadI32" in
+defm DLDLSX : LOADm<"dldl.sx", 0x0b, I32, i32, load>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm DLDLZX : LOADm<"dldl.zx", 0x0b, I32, i32, load>;
+
+// Section 8.2.15 - PFCH
+let DecoderMethod = "DecodeASX" in
+defm PFCH : PFCHm<"pfch", 0x0c>;
+
+// Section 8.2.16 - TS1AM (Test and Set 1 AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm TS1AML : RRCASm<"ts1am.l", 0x42, I64, i64, uimm7>;
+let DecoderMethod = "DecodeTS1AMI32", cx = 1 in
+defm TS1AMW : RRCASm<"ts1am.w", 0x42, I32, i32, uimm7>;
+
+// Section 8.2.17 - TS2AM (Test and Set 2 AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm TS2AM : RRCASm<"ts2am", 0x43, I64, i64, uimm7>;
+
+// Section 8.2.18 - TS3AM (Test and Set 3 AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm TS3AM : RRCASm<"ts3am", 0x52, I64, i64, uimm1>;
+
+// Section 8.2.19 - ATMAM (Atomic AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm ATMAM : RRCASm<"atmam", 0x53, I64, i64, uimm0to2>;
+
+// Section 8.2.20 - CAS (Compare and Swap)
+let DecoderMethod = "DecodeCASI64" in
+defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7>;
+let DecoderMethod = "DecodeCASI32", cx = 1 in
+defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7>;
+
+//-----------------------------------------------------------------------------
+// Section 8.3 - Transfer Control Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.3.1 - FENCE (Fence)
+let hasSideEffects = 1 in {
+  let avo = 1 in def FENCEI : RRFENCE<0x20, (outs), (ins), "fencei">;
+  def FENCEM : RRFENCE<0x20, (outs), (ins uimm2:$kind), "fencem $kind"> {
+    bits<2> kind;
+    let lf = kind{1};
+    let sf = kind{0};
+  }
+  def FENCEC : RRFENCE<0x20, (outs), (ins uimm3:$kind), "fencec $kind"> {
+    bits<3> kind;
+    let c2 = kind{2};
+    let c1 = kind{1};
+    let c0 = kind{0};
+  }
+}
+
+// Section 8.3.2 - SVOB (Set Vector Out-of-order memory access Boundary)
+let sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1 in
+def SVOB : RR<0x30, (outs), (ins), "svob">;
+
+//-----------------------------------------------------------------------------
+// Section 8.4 - Fixed-point Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.4.1 - ADD (Add)
+defm ADDUL : RRm<"addu.l", 0x48, I64, i64>;
+let cx = 1 in defm ADDUW : RRm<"addu.w", 0x48, I32, i32>;
+
+// Section 8.4.2 - ADS (Add Single)
+defm ADDSWSX : RRm<"adds.w.sx", 0x4A, I32, i32, add>;
+let cx = 1 in defm ADDSWZX : RRm<"adds.w.zx", 0x4A, I32, i32>;
+
+// Section 8.4.3 - ADX (Add)
+defm ADDSL : RRm<"adds.l", 0x59, I64, i64, add>;
+
+// Section 8.4.4 - SUB (Subtract)
+defm SUBUL : RRNCm<"subu.l", 0x58, I64, i64>;
+let cx = 1 in defm SUBUW : RRNCm<"subu.w", 0x58, I32, i32>;
+
+// Section 8.4.5 - SBS (Subtract Single)
+defm SUBSWSX : RRNCm<"subs.w.sx", 0x5A, I32, i32, sub>;
+let cx = 1 in defm SUBSWZX : RRNCm<"subs.w.zx", 0x5A, I32, i32>;
+
+// Section 8.4.6 - SBX (Subtract)
+defm SUBSL : RRNCm<"subs.l", 0x5B, I64, i64, sub>;
+
+// Section 8.4.7 - MPY (Multiply)
+defm MULUL : RRm<"mulu.l", 0x49, I64, i64>;
+let cx = 1 in defm MULUW : RRm<"mulu.w", 0x49, I32, i32>;
+
+// Section 8.4.8 - MPS (Multiply Single)
+defm MULSWSX : RRm<"muls.w.sx", 0x4B, I32, i32, mul>;
+let cx = 1 in defm MULSWZX : RRm<"muls.w.zx", 0x4B, I32, i32>;
+
+// Section 8.4.9 - MPX (Multiply)
+defm MULSL : RRm<"muls.l", 0x6E, I64, i64, mul>;
+
+// Section 8.4.10 - MPD (Multiply)
+defm MULSLW : RRbm<"muls.l.w", 0x6B, I64, i64, I32, i32>;
+
+// Section 8.4.11 - DIV (Divide)
+defm DIVUL : RRNCm<"divu.l", 0x6F, I64, i64, udiv>;
+let cx = 1 in defm DIVUW : RRNCm<"divu.w", 0x6F, I32, i32, udiv>;
+
+// Section 8.4.12 - DVS (Divide Single)
+defm DIVSWSX : RRNCm<"divs.w.sx", 0x7B, I32, i32, sdiv>;
+let cx = 1 in defm DIVSWZX : RRNCm<"divs.w.zx", 0x7B, I32, i32>;
+
+// Section 8.4.13 - DVX (Divide)
+defm DIVSL : RRNCm<"divs.l", 0x7F, I64, i64, sdiv>;
+
+// Section 8.4.14 - CMP (Compare)
+defm CMPUL : RRNCm<"cmpu.l", 0x55, I64, i64>;
+let cx = 1 in defm CMPUW : RRNCm<"cmpu.w", 0x55, I32, i32>;
+
+// Section 8.4.15 - CPS (Compare Single)
+defm CMPSWSX : RRNCm<"cmps.w.sx", 0x7A, I32, i32>;
+let cx = 1 in defm CMPSWZX : RRNCm<"cmps.w.zx", 0x7A, I32, i32>;
+
+// Section 8.4.16 - CPX (Compare)
+defm CMPSL : RRNCm<"cmps.l", 0x6A, I64, i64>;
+
+// Section 8.4.17 - CMS (Compare and Select Maximum/Minimum Single)
+// cx: sx/zx, cw: max/min
+defm MAXSWSX : RRm<"maxs.w.sx", 0x78, I32, i32>;
+let cx = 1 in defm MAXSWZX : RRm<"maxs.w.zx", 0x78, I32, i32>;
+let cw = 1 in defm MINSWSX : RRm<"mins.w.sx", 0x78, I32, i32>;
+let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>;
+
+// Section 8.4.18 - CMX (Compare and Select Maximum/Minimum)
+defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>;
+let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>;
+
+//-----------------------------------------------------------------------------
+// Section 8.5 - Logical Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.5.1 - AND (AND)
+defm AND : RRm<"and", 0x44, I64, i64, and>;
+let isCodeGenOnly = 1 in defm AND32 : RRm<"and", 0x44, I32, i32, and>;
+
+// Section 8.5.2 - OR (OR)
+defm OR : RRm<"or", 0x45, I64, i64, or>;
+let isCodeGenOnly = 1 in defm OR32 : RRm<"or", 0x45, I32, i32, or>;
+
+// Section 8.5.3 - XOR (Exclusive OR)
+defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
+let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
+
+// Section 8.5.4 - EQV (Equivalence)
+defm EQV : RRm<"eqv", 0x47, I64, i64>;
+
+// Section 8.5.5 - NND (Negate AND)
+def and_not : PatFrags<(ops node:$x, node:$y),
+                       [(and (not node:$x), node:$y)]>;
+defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
+
+// Section 8.5.6 - MRG (Merge)
+defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
+
+// Section 8.5.7 - LDZ (Leading Zero Count)
+defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>;
+
+// Section 8.5.8 - PCNT (Population Count)
+defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
+
+// Section 8.5.9 - BRV (Bit Reverse)
+defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
+
+// Section 8.5.10 - BSWP (Byte Swap)
+defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
+
+// Section 8.5.11 - CMOV (Conditional Move)
+let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
+let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
+let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64, f64>;
+let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32, f32>;
+def : MnemonicAlias<"cmov.l", "cmov.l.at">;
+def : MnemonicAlias<"cmov.w", "cmov.w.at">;
+def : MnemonicAlias<"cmov.d", "cmov.d.at">;
+def : MnemonicAlias<"cmov.s", "cmov.s.at">;
+
+//-----------------------------------------------------------------------------
+// Section 8.6 - Shift Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.6.1 - SLL (Shift Left Logical)
+defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
+
+// Section 8.6.2 - SLD (Shift Left Double)
+defm SLD : RRILDm<"sld", 0x64, I64, i64>;
+
+// Section 8.6.3 - SRL (Shift Right Logical)
+defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
+
+// Section 8.6.4 - SRD (Shift Right Double)
+defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
+
+// Section 8.6.5 - SLA (Shift Left Arithmetic)
+defm SLAWSX : RRIm<"sla.w.sx", 0x66, I32, i32, shl>;
+let cx = 1 in defm SLAWZX : RRIm<"sla.w.zx", 0x66, I32, i32>;
+
+// Section 8.6.6 - SLAX (Shift Left Arithmetic)
+defm SLAL : RRIm<"sla.l", 0x57, I64, i64>;
+
+// Section 8.6.7 - SRA (Shift Right Arithmetic)
+defm SRAWSX : RRIm<"sra.w.sx", 0x76, I32, i32, sra>;
+let cx = 1 in defm SRAWZX : RRIm<"sra.w.zx", 0x76, I32, i32>;
+
+// Section 8.6.8 - SRAX (Shift Right Arithmetic)
+defm SRAL : RRIm<"sra.l", 0x77, I64, i64, sra>;
+
+def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))),
+          (EXTRACT_SUBREG (SRLri (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+            $src, sub_i32), !add(32, 64)), imm:$val), sub_i32)>;
+def : Pat<(i32 (srl i32:$src, i32:$val)),
+          (EXTRACT_SUBREG (SRLrr (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+            $src, sub_i32), !add(32, 64)), $val), sub_i32)>;
+
+//-----------------------------------------------------------------------------
+// Section 8.7 - Floating-point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.7.1 - FAD (Floating Add)
+defm FADDD : RRFm<"fadd.d", 0x4C, I64, f64, fadd>;
+let cx = 1 in
+defm FADDS : RRFm<"fadd.s", 0x4C, F32, f32, fadd, simm7fp, mimmfp32>;
+
+// Section 8.7.2 - FSB (Floating Subtract)
+defm FSUBD : RRFm<"fsub.d", 0x5C, I64, f64, fsub>;
+let cx = 1 in
+defm FSUBS : RRFm<"fsub.s", 0x5C, F32, f32, fsub, simm7fp, mimmfp32>;
+
+// Section 8.7.3 - FMP (Floating Multiply)
+defm FMULD : RRFm<"fmul.d", 0x4D, I64, f64, fmul>;
+let cx = 1 in
+defm FMULS : RRFm<"fmul.s", 0x4D, F32, f32, fmul, simm7fp, mimmfp32>;
+
+// Section 8.7.4 - FDV (Floating Divide)
+defm FDIVD : RRFm<"fdiv.d", 0x5D, I64, f64, fdiv>;
+let cx = 1 in
+defm FDIVS : RRFm<"fdiv.s", 0x5D, F32, f32, fdiv, simm7fp, mimmfp32>;
+
+// Section 8.7.5 - FCP (Floating Compare)
+defm FCMPD : RRFm<"fcmp.d", 0x7E, I64, f64>;
+let cx = 1 in
+defm FCMPS : RRFm<"fcmp.s", 0x7E, F32, f32, null_frag, simm7fp, mimmfp32>;
+
+// Section 8.7.6 - CMS (Compare and Select Maximum/Minimum Single)
+// cx: double/float, cw: max/min
+let cw = 0, cx = 0 in
+defm FMAXD : RRFm<"fmax.d", 0x3E, I64, f64, fmaxnum>;
+let cw = 0, cx = 1 in
+defm FMAXS : RRFm<"fmax.s", 0x3E, F32, f32, fmaxnum, simm7fp, mimmfp32>;
+let cw = 1, cx = 0 in
+defm FMIND : RRFm<"fmin.d", 0x3E, I64, f64, fminnum>;
+let cw = 1, cx = 1 in
+defm FMINS : RRFm<"fmin.s", 0x3E, F32, f32, fminnum, simm7fp, mimmfp32>;
+
+// Section 8.7.7 - FAQ (Floating Add Quadruple)
+defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128>;
+
+// Section 8.7.8 - FSQ (Floating Subtract Quadruple)
+defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128>;
+
+// Section 8.7.9 - FMQ (Floating Subtract Quadruple)
+defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128>;
+
+// Section 8.7.10 - FCQ (Floating Compare Quadruple)
+defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp,
+                    mimmfp>;
+
+// Section 8.7.11 - FIX (Convert to Fixed Point)
+// cx: double/float, cw: sx/zx, sz{0-3} = round
+let cx = 0, cw = 0 /* sign extend */ in
+defm CVTWDSX : CVTRDm<"cvt.w.d.sx", 0x4E, I32, i32, I64, f64>;
+let cx = 0, cw = 1 /* zero extend */ in
+defm CVTWDZX : CVTRDm<"cvt.w.d.zx", 0x4E, I32, i32, I64, f64>;
+let cx = 1, cw = 0 /* sign extend */ in
+defm CVTWSSX : CVTRDm<"cvt.w.s.sx", 0x4E, I32, i32, F32, f32>;
+let cx = 1, cw = 1 /* zero extend */ in
+defm CVTWSZX : CVTRDm<"cvt.w.s.zx", 0x4E, I32, i32, F32, f32>;
+
+// Section 8.7.12 - FIXX (Convert to Fixed Point)
+defm CVTLD : CVTRDm<"cvt.l.d", 0x4F, I64, i64, I64, f64>;
+
+// Section 8.7.13 - FLT (Convert to Floating Point)
+defm CVTDW : CVTm<"cvt.d.w", 0x5E, I64, f64, I32, i32, sint_to_fp>;
+let cx = 1 in
+defm CVTSW : CVTm<"cvt.s.w", 0x5E, F32, f32, I32, i32, sint_to_fp>;
+
+// Section 8.7.14 - FLTX (Convert to Floating Point)
+defm CVTDL : CVTm<"cvt.d.l", 0x5F, I64, f64, I64, i64, sint_to_fp>;
+
+// Section 8.7.15 - CVS (Convert to Single-format)
+defm CVTSD : CVTm<"cvt.s.d", 0x1F, F32, f32, I64, f64, fpround>;
+let cx = 1 in
+defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128>;
+
+// Section 8.7.16 - CVD (Convert to Double-format)
+defm CVTDS : CVTm<"cvt.d.s", 0x0F, I64, f64, F32, f32, fpextend>;
+let cx = 1 in
+defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128>;
+
+// Section 8.7.17 - CVQ (Convert to Single-format)
+defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64>;
 let cx = 1 in
-defm LEASL : RMm<"lea.sl", 0x06, I64, i64, simm7Op64, simm32Op64>;
+defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32>;
+
+//-----------------------------------------------------------------------------
+// Section 8.8 - Branch instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.8.1 - BC (Branch on Codition)
+defm BCFL : BCm<"b${cond}.l", "b.l", "baf.l", 0x19, I64, simm7>;
+
+// Indirect branch aliases
+def : Pat<(brind I64:$reg), (BCFLari_t $reg, 0)>;
+def : Pat<(brind tblockaddress:$imm), (BCFLazi_t 0, $imm)>;
+
+// Return instruction is a special case of jump.
+let Uses = [SX10], bpf = 3 /* TAKEN */, cf = 15 /* AT */, cy = 0, sy = 0,
+    sz = 10 /* SX10 */, imm32 = 0, isReturn = 1, isTerminator = 1,
+    isBarrier = 1, isCodeGenOnly = 1, hasSideEffects = 0 in
+def RET : CF<0x19, (outs), (ins), "b.l.t (, %s10)", [(retflag)]>;
+
+// Section 8.8.2 - BCS (Branch on Condition Single)
+defm BCFW : BCm<"b${cond}.w", "b.w", "baf.w", 0x1B, I32, simm7>;
+
+// Section 8.8.3 - BCF (Branch on Condition Floating Point)
+defm BCFD : BCm<"b${cond}.d", "b.d", "baf.d", 0x1C, I64, simm7fp>;
+let cx = 1 in
+defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>;
+
+// Section 8.8.4 - BCR (Branch on Condition Relative)
+let cx = 0, cx2 = 0 in
+defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7>;
+let cx = 1, cx2 = 0 in
+defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7>;
+let cx = 0, cx2 = 1 in
+defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp>;
+let cx = 1, cx2 = 1 in
+defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp>;
+
+// Section 8.8.5 - BSIC (Branch and Save IC)
+let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in
+defm BSIC : RMm<"bsic", 0x08, I64>;
+
+// Call instruction is a special case of BSIC.
+let Defs = [SX10], sx = 10 /* SX10 */, cy = 0, sy = 0, imm32 = 0,
+    isCall = 1, isCodeGenOnly = 1, hasSideEffects = 0 in
+def CALLr : RM<0x08, (outs), (ins I64:$sz, variable_ops),
+               "bsic %s10, (, $sz)", [(call i64:$sz)]>;
 
-// 5.3.2.2. Fixed-Point Arithmetic Operation Instructions
+//-----------------------------------------------------------------------------
+// Section 8.19 - Control Instructions
+//-----------------------------------------------------------------------------
 
-// ADX instruction
-let cx = 0 in
-defm ADX : RRm<"adds.l", 0x59, I64, i64, simm7Op64, uimm6Op64>;
+// Section 8.19.1 - SIC (Save Instruction Counter)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1, Uses = [IC] in
+def SIC : RR<0x28, (outs I32:$sx), (ins), "sic $sx">;
 
-// 5.3.2.3. Logical Arithmetic Operation Instructions
+// Section 8.19.2 - LPM (Load Program Mode Flags)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 1, Defs = [PSW] in
+def LPM : RR<0x3a, (outs), (ins I64:$sy), "lpm $sy">;
 
-let cx = 0 in {
-  defm AND : RRm<"and", 0x44, I64, i64, simm7Op64, uimm6Op64>;
-  defm OR : RRm<"or", 0x45, I64, i64, simm7Op64, uimm6Op64>;
+// Section 8.19.3 - SPM (Save Program Mode Flags)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1, Uses = [PSW] in
+def SPM : RR<0x2a, (outs I64:$sx), (ins), "spm $sx">;
+
+// Section 8.19.4 - LFR (Load Flag Register)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 1, Defs = [PSW] in {
+  def LFRr : RR<0x69, (outs), (ins I64:$sy), "lfr $sy">;
+  let cy = 0 in def LFRi : RR<0x69, (outs), (ins uimm6:$sy), "lfr $sy">;
 }
 
-// Load and Store instructions
-// As 1st step, only uses sz and imm32 to represent $addr
-let mayLoad = 1, hasSideEffects = 0 in {
-let cy = 0, sy = 0, cz = 1 in {
-let cx = 0 in
-def LDSri : RM<
-    0x01, (outs I64:$sx), (ins MEMri:$addr),
-    "ld $sx, $addr">;
+// Section 8.19.5 - SFR (Save Flag Register)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1, Uses = [PSW] in
+def SFR : RR<0x29, (outs I64:$sx), (ins), "sfr $sx">;
+
+// Section 8.19.6 - SMIR (Save Miscellaneous Register)
+let cy = 0, cz = 0, sz = 0, hasSideEffects = 1 in {
+  def SMIR : RR<0x22, (outs I64:$sx), (ins MISC:$sy), "smir $sx, $sy">;
+}
+
+// Section 8.19.7 - NOP (No Operation)
+let sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
+def NOP : RR<0x79, (outs), (ins), "nop">;
+
+// Section 8.19.8 - MONC (Monitor Call)
+let sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1 in {
+  def MONC : RR<0x3F, (outs), (ins), "monc">;
+  let cx = 1, isTrap = 1 in def MONCHDB : RR<0x3F, (outs), (ins), "monc.hdb">;
 }
+
+// Section 8.19.9 - LCR (Load Communication Register)
+defm LCR : LOADCRm<"lcr", 0x40, I64>;
+
+// Section 8.19.10 - SCR (Save Communication Register)
+defm SCR : STORECRm<"scr", 0x50, I64>;
+
+// Section 8.19.11 - TSCR (Test & Set Communication Register)
+defm TSCR : LOADCRm<"tscr", 0x41, I64>;
+
+// Section 8.19.12 - FIDCR (Fetch & Increment/Decrement CR)
+defm FIDCR : FIDCRm<"fidcr", 0x51, I64>;
+
+//-----------------------------------------------------------------------------
+// Section 8.20 - Host Memory Access Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.20.1 - LHM (Load Host Memory)
+let ry = 3, DecoderMethod = "DecodeLoadASI64" in
+defm LHML : LHMm<"lhm.l", 0x21, I64>;
+let ry = 2, DecoderMethod = "DecodeLoadASI64" in
+defm LHMW : LHMm<"lhm.w", 0x21, I64>;
+let ry = 1, DecoderMethod = "DecodeLoadASI64" in
+defm LHMH : LHMm<"lhm.h", 0x21, I64>;
+let ry = 0, DecoderMethod = "DecodeLoadASI64" in
+defm LHMB : LHMm<"lhm.b", 0x21, I64>;
+
+// Section 8.20.2 - SHM (Store Host Memory)
+let ry = 3, DecoderMethod = "DecodeStoreASI64" in
+defm SHML : SHMm<"shm.l", 0x31, I64>;
+let ry = 2, DecoderMethod = "DecodeStoreASI64" in
+defm SHMW : SHMm<"shm.w", 0x31, I64>;
+let ry = 1, DecoderMethod = "DecodeStoreASI64" in
+defm SHMH : SHMm<"shm.h", 0x31, I64>;
+let ry = 0, DecoderMethod = "DecodeStoreASI64" in
+defm SHMB : SHMm<"shm.b", 0x31, I64>;
+
+//===----------------------------------------------------------------------===//
+// Instructions for CodeGenOnly
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matchings
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm7:$val), (OR32im (LO7 $val), 0)>;
+def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
+// Medium immediates.
+def : Pat<(i32 simm32:$val), (LEA32zii 0, 0, (LO32 $val))>;
+def : Pat<(i64 simm32:$val), (LEAzii 0, 0, (LO32 $val))>;
+def : Pat<(i64 uimm32:$val), (ANDrm (LEAzii 0, 0, (LO32 $val)), !add(32, 64))>;
+// Arbitrary immediates.
+def : Pat<(i64 lozero:$val),
+          (LEASLzii 0, 0, (HI32 imm:$val))>;
+def : Pat<(i64 lomsbzero:$val),
+          (LEASLrii (LEAzii 0, 0, (LO32 imm:$val)), 0, (HI32 imm:$val))>;
+def : Pat<(i64 imm:$val),
+          (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0,
+                    (HI32 imm:$val))>;
+
+// floating point
+def : Pat<(f32 fpimm:$val),
+          (EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>;
+def : Pat<(f64 fplozero:$val),
+          (LEASLzii 0, 0, (HIFP32 $val))>;
+def : Pat<(f64 fplomsbzero:$val),
+          (LEASLrii (LEAzii 0, 0, (LOFP32 $val)), 0, (HIFP32 $val))>;
+def : Pat<(f64 fpimm:$val),
+          (LEASLrii (ANDrm (LEAzii 0, 0, (LOFP32 $val)), !add(32, 64)), 0,
+                    (HIFP32 $val))>;
+
+// The same integer registers are used for i32 and i64 values.
+// When registers hold i32 values, the high bits are unused.
+
+// TODO Use standard expansion for shift-based lowering of sext_inreg
+
+// Cast to i1
+def : Pat<(sext_inreg I32:$src, i1),
+          (SRAWSXri (SLAWSXri $src, 31), 31)>;
+def : Pat<(sext_inreg I64:$src, i1),
+          (SRALri (SLLri $src, 63), 63)>;
+
+// Cast to i8
+def : Pat<(sext_inreg I32:$src, i8),
+          (SRAWSXri (SLAWSXri $src, 24), 24)>;
+def : Pat<(sext_inreg I64:$src, i8),
+          (SRALri (SLLri $src, 56), 56)>;
+def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8),
+          (EXTRACT_SUBREG (SRALri (SLLri $src, 56), 56), sub_i32)>;
+def : Pat<(and (trunc i64:$src), 0xff),
+          (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(56, 64))>;
+
+// Cast to i16
+def : Pat<(sext_inreg I32:$src, i16),
+          (SRAWSXri (SLAWSXri $src, 16), 16)>;
+def : Pat<(sext_inreg I64:$src, i16),
+          (SRALri (SLLri $src, 48), 48)>;
+def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16),
+          (EXTRACT_SUBREG (SRALri (SLLri $src, 48), 48), sub_i32)>;
+def : Pat<(and (trunc i64:$src), 0xffff),
+          (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(48, 64))>;
+
+// Cast to i32
+def : Pat<(i32 (trunc i64:$src)),
+          (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0)>;
+def : Pat<(i32 (fp_to_sint I64:$reg)), (CVTWDSXr RD_RZ, $reg)>;
+def : Pat<(i32 (fp_to_sint F32:$reg)), (CVTWSSXr RD_RZ, $reg)>;
+
+// Cast to i64
+def : Pat<(sext_inreg I64:$src, i32),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+            (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>;
+def : Pat<(i64 (sext i32:$sy)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $sy, 0), sub_i32)>;
+def : Pat<(i64 (zext i32:$sy)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $sy, 0), sub_i32)>;
+def : Pat<(i64 (fp_to_sint f32:$sy)), (CVTLDr RD_RZ, (CVTDSr $sy))>;
+def : Pat<(i64 (fp_to_sint I64:$reg)), (CVTLDr RD_RZ, $reg)>;
+
+// Cast to f32
+def : Pat<(f32 (sint_to_fp i64:$sy)), (CVTSDr (CVTDLr i64:$sy))>;
+
+def : Pat<(i64 (anyext i32:$sy)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>;
+
+
+// extload, sextload and zextload stuff
+multiclass EXT64m<SDPatternOperator from,
+                  SDPatternOperator torri,
+                  SDPatternOperator torii,
+                  SDPatternOperator tozri,
+                  SDPatternOperator tozii> {
+  def : Pat<(i64 (from ADDRrri:$addr)),
+            (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (torri MEMrri:$addr),
+                           sub_i32)>;
+  def : Pat<(i64 (from ADDRrii:$addr)),
+            (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (torii MEMrii:$addr),
+                           sub_i32)>;
+  def : Pat<(i64 (from ADDRzri:$addr)),
+            (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (tozri MEMzri:$addr),
+                           sub_i32)>;
+  def : Pat<(i64 (from ADDRzii:$addr)),
+            (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (tozii MEMzii:$addr),
+                           sub_i32)>;
 }
+defm : EXT64m<sextloadi8, LD1BSXrri, LD1BSXrii, LD1BSXzri, LD1BSXzii>;
+defm : EXT64m<zextloadi8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : EXT64m<extloadi8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : EXT64m<sextloadi16, LD2BSXrri, LD2BSXrii, LD2BSXzri, LD2BSXzii>;
+defm : EXT64m<zextloadi16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+defm : EXT64m<extloadi16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+defm : EXT64m<sextloadi32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>;
+defm : EXT64m<zextloadi32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
+defm : EXT64m<extloadi32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>;
 
-let mayStore = 1, hasSideEffects = 0 in {
-let cx = 0, cy = 0, sy = 0, cz = 1 in {
-def STSri : RM<
-    0x11, (outs), (ins MEMri:$addr, I64:$sx),
-    "st $sx, $addr">;
+// anyextload
+multiclass EXT32m<SDPatternOperator from,
+                  SDPatternOperator torri,
+                  SDPatternOperator torii,
+                  SDPatternOperator tozri,
+                  SDPatternOperator tozii> {
+  def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>;
+  def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>;
+  def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>;
+  def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>;
 }
+defm : EXT32m<extloadi8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : EXT32m<extloadi16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+
+// truncstore
+multiclass TRUNC64m<SDPatternOperator from,
+                    SDPatternOperator torri,
+                    SDPatternOperator torii,
+                    SDPatternOperator tozri,
+                    SDPatternOperator tozii> {
+  def : Pat<(from i64:$src, ADDRrri:$addr),
+            (torri MEMrri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+  def : Pat<(from i64:$src, ADDRrii:$addr),
+            (torii MEMrii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+  def : Pat<(from i64:$src, ADDRzri:$addr),
+            (tozri MEMzri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+  def : Pat<(from i64:$src, ADDRzii:$addr),
+            (tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
 }
+defm : TRUNC64m<truncstorei8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : TRUNC64m<truncstorei16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>;
+
+// Address calculation and its optimization
+def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>;
+def : Pat<(VElo tglobaladdr:$in),
+          (ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>;
+def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
+          (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0,
+                    (tglobaladdr:$in1))>;
 
-// Return instruction is also a special case of jump.
-let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0,
-    cz = 1, sz = 0x10 /* SX10 */, imm32 = 0, Uses = [SX10],
-    isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
-    isCodeGenOnly = 1, hasSideEffects = 0 in
-def RET : CF<
-    0x19, (outs), (ins),
-    "b.l (,%lr)",
-    [(retflag)]>;
+// GlobalTLS address calculation and its optimization
+def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>;
+def : Pat<(VElo tglobaltlsaddr:$in),
+          (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>;
+def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
+          (LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0,
+                    (tglobaltlsaddr:$in1))>;
 
-// Branch instruction
-let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in
-defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>;
+// Address calculation and its optimization
+def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>;
+def : Pat<(VElo texternalsym:$in),
+          (ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>;
+def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
+          (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0,
+                    (texternalsym:$in1))>;
 
-let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in {
-let sy = 3 in
-def SHMri : RM<
-    0x31, (outs), (ins MEMASri:$addr, I64:$sx),
-    "shm.l $sx, $addr">;
+// Branches
+def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;
+
+// brcc
+// integer brcc
+multiclass BRCCIm<ValueType ty, SDPatternOperator BrOpNode1,
+                 SDPatternOperator BrOpNode2,
+                 SDPatternOperator CmpOpNode1,
+                 SDPatternOperator CmpOpNode2> {
+  def : Pat<(brcc CCSIOp:$cond, ty:$l, simm7:$r, bb:$addr),
+            (BrOpNode2 (icond2ccSwap $cond), (LO7 $r), $l, bb:$addr)>;
+  def : Pat<(brcc CCSIOp:$cond, ty:$l, ty:$r, bb:$addr),
+            (BrOpNode1 (icond2cc $cond), $l, $r, bb:$addr)>;
+  def : Pat<(brcc CCUIOp:$cond, ty:$l, simm7:$r, bb:$addr),
+            (BrOpNode2 (icond2cc $cond), 0, (CmpOpNode2 (LO7 $r), $l),
+                       bb:$addr)>;
+  def : Pat<(brcc CCUIOp:$cond, ty:$l, ty:$r, bb:$addr),
+            (BrOpNode2 (icond2cc $cond), 0, (CmpOpNode1 $r, $l), bb:$addr)>;
 }
+defm : BRCCIm<i32, BRCFWrr, BRCFWir, CMPUWrr, CMPUWir>;
+defm : BRCCIm<i64, BRCFLrr, BRCFLir, CMPULrr, CMPULir>;
 
-let cx = 0, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
-def MONC : RR<
-    0x3F, (outs), (ins),
-    "monc">;
+// floating point brcc
+multiclass BRCCFm<ValueType ty, SDPatternOperator BrOpNode1,
+                 SDPatternOperator BrOpNode2> {
+  def : Pat<(brcc cond:$cond, ty:$l, simm7fp:$r, bb:$addr),
+            (BrOpNode2 (fcond2ccSwap $cond), (LO7FP $r), $l, bb:$addr)>;
+  def : Pat<(brcc cond:$cond, ty:$l, ty:$r, bb:$addr),
+            (BrOpNode1 (fcond2cc $cond), $l, $r, bb:$addr)>;
+}
+defm : BRCCFm<f32, BRCFSrr, BRCFSir>;
+defm : BRCCFm<f64, BRCFDrr, BRCFDir>;
 
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
 //===----------------------------------------------------------------------===//
 
+// GETGOT for PIC
+let Defs = [SX15 /* %got */, SX16 /* %plt */], hasSideEffects = 0 in {
+  def GETGOT : Pseudo<(outs getGOT:$getpcseq), (ins), "$getpcseq">;
+}
+
+// GETFUNPLT for PIC
+let hasSideEffects = 0 in
+def GETFUNPLT : Pseudo<(outs I64:$dst), (ins i64imm:$addr),
+                       "$dst, $addr",
+                       [(set iPTR:$dst, (GetFunPLT tglobaladdr:$addr))] >;
+
+def : Pat<(GetFunPLT tglobaladdr:$dst),
+          (GETFUNPLT tglobaladdr:$dst)>;
+def : Pat<(GetFunPLT texternalsym:$dst),
+          (GETFUNPLT texternalsym:$dst)>;
+
+// GETTLSADDR for TLS
+let Defs = [SX0, SX10, SX12], hasSideEffects = 0 in
+def GETTLSADDR : Pseudo<(outs), (ins i64imm:$addr),
+                        "# GETTLSADDR $addr",
+                        [(GetTLSAddr tglobaltlsaddr:$addr)] >;
+
+def : Pat<(GetTLSAddr tglobaltlsaddr:$dst),
+          (GETTLSADDR tglobaltlsaddr:$dst)>;
+
 let Defs = [SX11], Uses = [SX11], hasSideEffects = 0 in {
 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt, i64imm:$amt2),
                               "# ADJCALLSTACKDOWN $amt, $amt2",
@@ -286,3 +1728,278 @@ let  hasSideEffects = 0 in
 def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
                                 "# EXTEND STACK GUARD",
                                 []>;
+
+// Dynamic stack allocation yields a __llvm_grow_stack for VE targets.
+// These calls are needed to probe the stack when allocating more over
+// %s8 (%sl - stack limit).
+
+let Uses = [SX11], hasSideEffects = 1 in
+def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
+                         "# GET STACK TOP",
+                         [(set iPTR:$dst, (GetStackTop))]>;
+// SETCC pattern matches
+//
+//   CMP  %tmp, lhs, rhs     ; compare lhs and rhs
+//   or   %res, 0, (0)1      ; initialize by 0
+//   CMOV %res, (63)0, %tmp  ; set 1 if %tmp is true
+
+def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVLrm (icond2cc $cond),
+                       (CMPSLrr i64:$LHS, i64:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVLrm (icond2cc $cond),
+                       (CMPULrr i64:$LHS, i64:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVWrm (icond2cc $cond),
+                       (CMPSWSXrr i32:$LHS, i32:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVWrm (icond2cc $cond),
+                       (CMPUWrr i32:$LHS, i32:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVDrm (fcond2cc $cond),
+                       (FCMPDrr f64:$LHS, f64:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVSrm (fcond2cc $cond),
+                       (FCMPSrr f32:$LHS, f32:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
+
+// Special SELECTCC pattern matches
+// Use min/max for better performance.
+//
+//   MAX/MIN  %res, %lhs, %rhs
+
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGT)),
+          (FMAXDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGT)),
+          (FMAXSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGT)),
+          (MAXSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGT)),
+          (MAXSWSXrr $LHS, $RHS)>;
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGE)),
+          (FMAXDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGE)),
+          (FMAXSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGE)),
+          (MAXSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGE)),
+          (MAXSWSXrr $LHS, $RHS)>;
+
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLT)),
+          (FMINDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLT)),
+          (FMINSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLT)),
+          (MINSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLT)),
+          (MINSWSXrr $LHS, $RHS)>;
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLE)),
+          (FMINDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLE)),
+          (FMINSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)),
+          (MINSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)),
+          (MINSWSXrr $LHS, $RHS)>;
+
+// Generic SELECTCC pattern matches
+//
+//   CMP  %tmp, %l, %r       ; compare %l and %r
+//   or   %res, %f, (0)1     ; initialize by %f
+//   CMOV %res, %t, %tmp     ; set %t if %tmp is true
+
+// selectcc for i64 result
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+          (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+          (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+          (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+          (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
+          (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
+          (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+
+// selectcc for i32 result
+def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVWrr (icond2cc $cond),
+                       (CMPSWSXrr $l, $r),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+              sub_i32)>;
+def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVWrr (icond2cc $cond),
+                       (CMPUWrr $l, $r),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+              sub_i32)>;
+def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVLrr (icond2cc $cond),
+                       (CMPSLrr $l, $r),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+              sub_i32)>;
+def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVLrr (icond2cc $cond),
+                       (CMPULrr $l, $r),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+              sub_i32)>;
+def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVSrr (fcond2cc $cond),
+                       (FCMPSrr $l, $r),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+              sub_i32)>;
+def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVDrr (fcond2cc $cond),
+                       (FCMPDrr $l, $r),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+              sub_i32)>;
+
+// selectcc for f64 result
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+          (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+          (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+          (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+          (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
+          (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
+          (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+
+// selectcc for f32 result
+def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVWrr (icond2cc $cond),
+                       (CMPSWSXrr $l, $r),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+              sub_f32)>;
+def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVWrr (icond2cc $cond),
+                       (CMPUWrr $l, $r),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+              sub_f32)>;
+def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVLrr (icond2cc $cond),
+                       (CMPSLrr $l, $r),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+              sub_f32)>;
+def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVLrr (icond2cc $cond),
+                       (CMPULrr $l, $r),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+              sub_f32)>;
+def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVSrr (fcond2cc $cond),
+                       (FCMPSrr $l, $r),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+              sub_f32)>;
+def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)),
+          (EXTRACT_SUBREG
+              (CMOVDrr (fcond2cc $cond),
+                       (FCMPDrr $l, $r),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+              sub_f32)>;
+
+// Generic SELECT pattern matches
+// Use cmov.w for all cases since %pred holds i32.
+//
+//   CMOV.w.ne %res, %tval, %tmp  ; set tval if %tmp is true
+
+def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)),
+          (CMOVWrr CC_INE, $pred, $t, $f)>;
+
+def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
+          (EXTRACT_SUBREG
+              (CMOVWrr CC_INE, $pred,
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                       (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+              sub_i32)>;
+
+def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)),
+          (CMOVWrr CC_INE, $pred, $t, $f)>;
+
+def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
+          (EXTRACT_SUBREG
+            (CMOVWrr CC_INE, $pred,
+                     (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32),
+                     (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)),
+            sub_f32)>;
+
+// bitconvert
+def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>;
+def : Pat<(i64 (bitconvert f64:$src)), (COPY_TO_REGCLASS $src, I64)>;
+
+def : Pat<(i32 (bitconvert f32:$op)),
+          (EXTRACT_SUBREG (SRALri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+            $op, sub_f32), 32), sub_i32)>;
+def : Pat<(f32 (bitconvert i32:$op)),
+          (EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+            $op, sub_i32), 32), sub_f32)>;
+
+// Bits operations pattern matchings.
+def : Pat<(i32 (ctpop i32:$src)),
+          (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG
+            (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>;
+def : Pat<(i32 (ctlz i32:$src)),
+          (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG
+            (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>;
+def : Pat<(i64 (bswap i64:$src)),
+          (BSWPri $src, 0)>;
+def : Pat<(i32 (bswap i32:$src)),
+          (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG
+            (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>;
+
+// Several special pattern matches to optimize code
+
+def : Pat<(i32 (and i32:$lhs, 0xff)),
+          (AND32rm $lhs, !add(56, 64))>;
+def : Pat<(i32 (and i32:$lhs, 0xffff)),
+          (AND32rm $lhs, !add(48, 64))>;
+def : Pat<(i32 (and i32:$lhs, 0xffffffff)),
+          (AND32rm $lhs, !add(32, 64))>;
diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp
index 6c8fc3536c346..9815610510e14 100644
--- a/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/VEMCExpr.h"
 #include "VE.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,9 +28,16 @@ using namespace llvm;
 static MCOperand LowerSymbolOperand(const MachineInstr *MI,
                                     const MachineOperand &MO,
                                     const MCSymbol *Symbol, AsmPrinter &AP) {
+  VEMCExpr::VariantKind Kind = (VEMCExpr::VariantKind)MO.getTargetFlags();
 
-  const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, AP.OutContext);
-  return MCOperand::createExpr(MCSym);
+  const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, AP.OutContext);
+  // Add offset iff MO is not jump table info or machine basic block.
+  if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
+    Expr = MCBinaryExpr::createAdd(
+        Expr, MCConstantExpr::create(MO.getOffset(), AP.OutContext),
+        AP.OutContext);
+  Expr = VEMCExpr::create(Kind, Expr, AP.OutContext);
+  return MCOperand::createExpr(Expr);
 }
 
 static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
@@ -43,6 +51,11 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
       break;
     return MCOperand::createReg(MO.getReg());
 
+  case MachineOperand::MO_ExternalSymbol:
+    return LowerSymbolOperand(
+        MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
+  case MachineOperand::MO_GlobalAddress:
+    return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
   case MachineOperand::MO_Immediate:
     return MCOperand::createImm(MO.getImm());
 
diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
new file mode 100644
index 0000000000000..1addfc7174eb5
--- /dev/null
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
@@ -0,0 +1,13 @@
+//===-- VEMachineFunctionInfo.cpp - VE Machine Function Info --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void VEMachineFunctionInfo::anchor() {}
diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
new file mode 100644
index 0000000000000..16b25fed3f11d
--- /dev/null
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
@@ -0,0 +1,48 @@
+//===- VEMachineFunctionInfo.h - VE Machine Function Info -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares  VE specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class VEMachineFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
+
+private:
+  Register GlobalBaseReg;
+
+  /// VarArgsFrameOffset - Frame offset to start of varargs area.
+  int VarArgsFrameOffset;
+
+  /// IsLeafProc - True if the function is a leaf procedure.
+  bool IsLeafProc;
+
+public:
+  VEMachineFunctionInfo()
+      : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {}
+  explicit VEMachineFunctionInfo(MachineFunction &MF)
+      : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {}
+
+  Register getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+  void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+  void setLeafProc(bool rhs) { IsLeafProc = rhs; }
+  bool isLeafProc() const { return IsLeafProc; }
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index e1ff614abc202..5783a8df69d24 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -34,12 +34,22 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
 
 const MCPhysReg *
 VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  return CSR_SaveList;
+  switch (MF->getFunction().getCallingConv()) {
+  default:
+    return CSR_SaveList;
+  case CallingConv::PreserveAll:
+    return CSR_preserve_all_SaveList;
+  }
 }
 
 const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                                      CallingConv::ID CC) const {
-  return CSR_RegMask;
+  switch (CC) {
+  default:
+    return CSR_RegMask;
+  case CallingConv::PreserveAll:
+    return CSR_preserve_all_RegMask;
+  }
 }
 
 const uint32_t *VERegisterInfo::getNoPreservedMask() const {
@@ -48,26 +58,34 @@ const uint32_t *VERegisterInfo::getNoPreservedMask() const {
 
 BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  Reserved.set(VE::SX8);  // stack limit
-  Reserved.set(VE::SX9);  // frame pointer
-  Reserved.set(VE::SX10); // link register (return address)
-  Reserved.set(VE::SX11); // stack pointer
 
-  Reserved.set(VE::SX12); // outer register
-  Reserved.set(VE::SX13); // id register for dynamic linker
-
-  Reserved.set(VE::SX14); // thread pointer
-  Reserved.set(VE::SX15); // global offset table register
-  Reserved.set(VE::SX16); // procedure linkage table register
-  Reserved.set(VE::SX17); // linkage-area register
-
-  // sx18-sx33 are callee-saved registers
-  // sx34-sx63 are temporary registers
+  const Register ReservedRegs[] = {
+      VE::SX8,  // Stack limit
+      VE::SX9,  // Frame pointer
+      VE::SX10, // Link register (return address)
+      VE::SX11, // Stack pointer
+
+      // FIXME: maybe not need to be reserved
+      VE::SX12, // Outer register
+      VE::SX13, // Id register for dynamic linker
+
+      VE::SX14, // Thread pointer
+      VE::SX15, // Global offset table register
+      VE::SX16, // Procedure linkage table register
+      VE::SX17, // Linkage-area register
+                // sx18-sx33 are callee-saved registers
+                // sx34-sx63 are temporary registers
+  };
+
+  for (auto R : ReservedRegs)
+    for (MCRegAliasIterator ItAlias(R, this, true); ItAlias.isValid();
+         ++ItAlias)
+      Reserved.set(*ItAlias);
 
   return Reserved;
 }
 
-bool VERegisterInfo::isConstantPhysReg(unsigned PhysReg) const { return false; }
+bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return false; }
 
 const TargetRegisterClass *
 VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
@@ -77,12 +95,12 @@ VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
 
 static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
                       MachineInstr &MI, const DebugLoc &dl,
-                      unsigned FIOperandNum, int Offset, unsigned FramePtr) {
+                      unsigned FIOperandNum, int Offset, Register FrameReg) {
   // Replace frame index with a frame pointer reference directly.
   // VE has 32 bit offset field, so no need to expand a target instruction.
   // Directly encode it.
-  MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
-  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+  MI.getOperand(FIOperandNum + 2).ChangeToImmediate(Offset);
 }
 
 void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
@@ -96,11 +114,11 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineFunction &MF = *MI.getParent()->getParent();
   const VEFrameLowering *TFI = getFrameLowering(MF);
 
-  unsigned FrameReg;
+  Register FrameReg;
   int Offset;
   Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
 
-  Offset += MI.getOperand(FIOperandNum + 1).getImm();
+  Offset += MI.getOperand(FIOperandNum + 2).getImm();
 
   replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
 }
diff --git a/llvm/lib/Target/VE/VERegisterInfo.h b/llvm/lib/Target/VE/VERegisterInfo.h
index 9cb475f5e1744..9a32da16bea60 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.h
+++ b/llvm/lib/Target/VE/VERegisterInfo.h
@@ -30,7 +30,7 @@ public:
   const uint32_t *getNoPreservedMask() const override;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
-  bool isConstantPhysReg(unsigned PhysReg) const override;
+  bool isConstantPhysReg(MCRegister PhysReg) const override;
 
   const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
                                                 unsigned Kind) const override;
diff --git a/llvm/lib/Target/VE/VERegisterInfo.td b/llvm/lib/Target/VE/VERegisterInfo.td
index ef5b9c09705a1..29708d35c7307 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.td
+++ b/llvm/lib/Target/VE/VERegisterInfo.td
@@ -10,28 +10,135 @@
 //  Declarations that describe the VE register file
 //===----------------------------------------------------------------------===//
 
-class VEReg<bits<7> Enc, string n> : Register<n> {
+class VEReg<bits<7> enc, string n, list<Register> subregs = [],
+            list<string> altNames = [], list<Register> aliases = []>
+        : Register<n, altNames> {
   let HWEncoding{15-7} = 0;
-  let HWEncoding{6-0} = Enc;
+  let HWEncoding{6-0} = enc;
   let Namespace = "VE";
-}
-
-// Registers are identified with 7-bit ID numbers.
-// R - 64-bit integer or floating-point registers
-class R<bits<7> Enc, string n, list<Register> subregs = [],
-        list<Register> aliases = []>: VEReg<Enc, n> {
   let SubRegs = subregs;
   let Aliases = aliases;
 }
 
+class VEMiscReg<bits<6> enc, string n>: Register<n> {
+  let HWEncoding{15-6} = 0;
+  let HWEncoding{5-0} = enc;
+  let Namespace = "VE";
+}
+
+let Namespace = "VE" in {
+  def sub_i8      : SubRegIndex<8, 56>;         // Low 8 bit (56..63)
+  def sub_i16     : SubRegIndex<16, 48>;        // Low 16 bit (48..63)
+  def sub_i32     : SubRegIndex<32, 32>;        // Low 32 bit (32..63)
+  def sub_f32     : SubRegIndex<32>;            // High 32 bit (0..31)
+  def sub_even    : SubRegIndex<64>;            // High 64 bit (0..63)
+  def sub_odd     : SubRegIndex<64, 64>;        // Low 64 bit (64..127)
+  def AsmName     : RegAltNameIndex;
+}
+
+//-----------------------------------------------------------------------------
+// Miscellaneous Registers
+//-----------------------------------------------------------------------------
+
+def USRCC : VEMiscReg<0, "usrcc">;      // User clock counter
+def PSW : VEMiscReg<1, "psw">;          // Program status word
+def SAR : VEMiscReg<2, "sar">;          // Store address register
+def PMMR : VEMiscReg<7, "pmmr">;        // Performance monitor mode register
+
+// Performance monitor configuration registers
+foreach I = 0-3 in
+  def PMCR#I : VEMiscReg<!add(8,I), "pmcr"#I>;
+
+// Performance monitor counter
+foreach I = 0-14 in
+  def PMC#I : VEMiscReg<!add(16,I), "pmc"#I>;
+
+// Register classes.
+def MISC : RegisterClass<"VE", [i64], 64,
+                         (add USRCC, PSW, SAR, PMMR,
+                              (sequence "PMCR%u", 0, 3),
+                              (sequence "PMC%u", 0, 14))>;
+
+//-----------------------------------------------------------------------------
+// Instruction Counter Register
+//-----------------------------------------------------------------------------
+
+def IC : VEMiscReg<62, "ic">;
+
+//-----------------------------------------------------------------------------
+// Gneric Registers
+//-----------------------------------------------------------------------------
+
+let RegAltNameIndices = [AsmName] in {
+
+// Generic integer registers - 8 bits wide
+foreach I = 0-63 in
+  def SB#I : VEReg<I, "sb"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
+
+// Generic integer registers - 16 bits wide
+let SubRegIndices = [sub_i8] in
+foreach I = 0-63 in
+  def SH#I : VEReg<I, "sh"#I, [!cast<VEReg>("SB"#I)], ["s"#I]>,
+                   DwarfRegNum<[I]>;
+
+// Generic integer registers - 32 bits wide
+let SubRegIndices = [sub_i16] in
+foreach I = 0-63 in
+  def SW#I : VEReg<I, "sw"#I, [!cast<VEReg>("SH"#I)], ["s"#I]>,
+                   DwarfRegNum<[I]>;
+
+// Generic floating point registers - 32 bits wide
+//   NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation
+//         problem.
+foreach I = 0-63 in
+  def SF#I : VEReg<I, "sf"#I, [], ["s"#I], [!cast<VEReg>("SW"#I)]>,
+                   DwarfRegNum<[I]>;
+
 // Generic integer registers - 64 bits wide
+let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in
 foreach I = 0-63 in
-  def SX#I : R<I, "S"#I, []>,
-             DwarfRegNum<[I]>;
+  def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
+                   ["s"#I]>, DwarfRegNum<[I]>;
+
+// Aliases of the S* registers used to hold 128-bit for values (long doubles).
+// Following foreach represents something like:
+//   def Q0 : VEReg<0, "q0", [SX0, SX1], ["s0"]>;
+//   def Q1 : VEReg<2, "q2", [SX2, SX3], ["s2"]>;
+//   ...
+let SubRegIndices = [sub_even, sub_odd], CoveredBySubRegs = 1 in
+foreach I = 0-31 in
+  def Q#I : VEReg<!shl(I,1), "q"#I,
+                  [!cast<VEReg>("SX"#!shl(I,1)),
+                   !cast<VEReg>("SX"#!add(!shl(I,1),1))],
+                  ["s"#!shl(I,1)]>;
+
+} // RegAltNameIndices = [AsmName]
 
 // Register classes.
 //
 // The register order is defined in terms of the preferred
 // allocation order.
-def I64 : RegisterClass<"VE", [i64], 64,
-                        (sequence "SX%u", 0, 63)>;
+def I8  : RegisterClass<"VE", [i8], 8,
+                        (add (sequence "SB%u", 0, 7),
+                             (sequence "SB%u", 34, 63),
+                             (sequence "SB%u", 8, 33))>;
+def I16 : RegisterClass<"VE", [i16], 16,
+                        (add (sequence "SH%u", 0, 7),
+                             (sequence "SH%u", 34, 63),
+                             (sequence "SH%u", 8, 33))>;
+def I32 : RegisterClass<"VE", [i32], 32,
+                        (add (sequence "SW%u", 0, 7),
+                             (sequence "SW%u", 34, 63),
+                             (sequence "SW%u", 8, 33))>;
+def I64 : RegisterClass<"VE", [i64, f64], 64,
+                        (add (sequence "SX%u", 0, 7),
+                             (sequence "SX%u", 34, 63),
+                             (sequence "SX%u", 8, 33))>;
+def F32 : RegisterClass<"VE", [f32], 32,
+                        (add (sequence "SF%u", 0, 7),
+                             (sequence "SF%u", 34, 63),
+                             (sequence "SF%u", 8, 33))>;
+def F128 : RegisterClass<"VE", [f128], 128,
+                        (add (sequence "Q%u", 0, 3),
+                             (sequence "Q%u", 17, 31),
+                             (sequence "Q%u", 4, 16))>;
diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp
index 861e88cdb5832..a0b78d95e3cf0 100644
--- a/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/llvm/lib/Target/VE/VESubtarget.cpp
@@ -28,7 +28,7 @@ void VESubtarget::anchor() {}
 VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                           StringRef FS) {
   // Determine default and user specified characteristics
-  std::string CPUName = CPU;
+  std::string CPUName = std::string(CPU);
   if (CPUName.empty())
     CPUName = "ve";
 
diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h
index e9637cc16023b..f3a2c206162e9 100644
--- a/llvm/lib/Target/VE/VESubtarget.h
+++ b/llvm/lib/Target/VE/VESubtarget.h
@@ -42,7 +42,7 @@ public:
               const TargetMachine &TM);
 
   const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const TargetFrameLowering *getFrameLowering() const override {
+  const VEFrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
   const VERegisterInfo *getRegisterInfo() const override {
diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp
index 46f5c0dc18051..08b55eebbc98d 100644
--- a/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -10,6 +10,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "VETargetMachine.h"
+#include "TargetInfo/VETargetInfo.h"
 #include "VE.h"
 #include "VETargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -40,8 +41,8 @@ static std::string computeDataLayout(const Triple &T) {
   // VE supports 32 bit and 64 bits integer on registers
   Ret += "-n32:64";
 
-  // Stack alignment is 64 bits
-  Ret += "-S64";
+  // Stack alignment is 128 bits
+  Ret += "-S128";
 
   return Ret;
 }
@@ -73,7 +74,8 @@ VETargetMachine::VETargetMachine(const Target &T, const Triple &TT,
     : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
                         getEffectiveRelocModel(RM),
                         getEffectiveCodeModel(CM, CodeModel::Small), OL),
-      TLOF(createTLOF()), Subtarget(TT, CPU, FS, *this) {
+      TLOF(createTLOF()),
+      Subtarget(TT, std::string(CPU), std::string(FS), *this) {
   initAsmInfo();
 }
 
diff --git a/llvm/lib/Target/VE/VETargetMachine.h b/llvm/lib/Target/VE/VETargetMachine.h
index 3191d59ec1c80..041d3b197ec3c 100644
--- a/llvm/lib/Target/VE/VETargetMachine.h
+++ b/llvm/lib/Target/VE/VETargetMachine.h
@@ -50,6 +50,8 @@ public:
   bool isMachineVerifierClean() const override { return false; }
 
   TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
+  unsigned getSjLjDataSize() const override { return 64; }
 };
 
 } // namespace llvm