31 files changed, 471 insertions, 393 deletions
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 4552ca016bd76..ffec4ff64ca66 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -782,11 +782,7 @@ public:
 
   /// Create an assume intrinsic call that allows the optimizer to
   /// assume that the provided condition will be true.
-  ///
-  /// The optional argument \p OpBundles specifies operand bundles that are
-  /// added to the call instruction.
-  CallInst *CreateAssumption(Value *Cond,
-                             ArrayRef<OperandBundleDef> OpBundles = llvm::None);
+  CallInst *CreateAssumption(Value *Cond);
 
   /// Create a call to the experimental.gc.statepoint intrinsic to
   /// start a new statepoint sequence.
@@ -2506,11 +2502,13 @@ public:
 
 private:
   /// Helper function that creates an assume intrinsic call that
-  /// represents an alignment assumption on the provided pointer \p PtrValue
-  /// with offset \p OffsetValue and alignment value \p AlignValue.
+  /// represents an alignment assumption on the provided Ptr, Mask, Type
+  /// and Offset. It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
   CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL,
-                                            Value *PtrValue, Value *AlignValue,
-                                            Value *OffsetValue);
+                                            Value *PtrValue, Value *Mask,
+                                            Type *IntPtrTy, Value *OffsetValue,
+                                            Value **TheCheck);
 
 public:
   /// Create an assume intrinsic call that represents an alignment
@@ -2519,9 +2517,13 @@ public:
   /// An optional offset can be provided, and if it is provided, the offset
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
+  ///
+  /// It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       unsigned Alignment,
-                                      Value *OffsetValue = nullptr);
+                                      Value *OffsetValue = nullptr,
+                                      Value **TheCheck = nullptr);
 
   /// Create an assume intrinsic call that represents an alignment
   /// assumption on the provided pointer.
@@ -2530,11 +2532,15 @@ public:
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
   ///
+  /// It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
+  ///
   /// This overload handles the condition where the Alignment is dependent
   /// on an existing value rather than a static value.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       Value *Alignment,
-                                      Value *OffsetValue = nullptr);
+                                      Value *OffsetValue = nullptr,
+                                      Value **TheCheck = nullptr);
 };
 
 /// This provides a uniform API for creating instructions and inserting
diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def
new file mode 100644
index 0000000000000..28de6cd40132e
--- /dev/null
+++ b/llvm/include/llvm/Support/RISCVTargetParser.def
@@ -0,0 +1,13 @@
+#ifndef PROC
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH)
+#endif
+
+PROC(INVALID, {"invalid"}, FK_INVALID, {""})
+PROC(GENERIC_RV32, {"generic-rv32"}, FK_NONE, {""})
+PROC(GENERIC_RV64, {"generic-rv64"}, FK_64BIT, {""})
+PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""})
+PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""})
+PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"})
+PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
+
+#undef PROC
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index a0bd88c153b6e..f521d8f836b4b 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -130,6 +130,32 @@ IsaVersion getIsaVersion(StringRef GPU);
 
 } // namespace AMDGPU
 
+namespace RISCV {
+
+enum CPUKind : unsigned {
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) CK_##ENUM,
+#include "RISCVTargetParser.def"
+};
+
+enum FeatureKind : unsigned {
+  FK_INVALID = 0,
+  FK_NONE = 1,
+  FK_STDEXTM = 1 << 2,
+  FK_STDEXTA = 1 << 3,
+  FK_STDEXTF = 1 << 4,
+  FK_STDEXTD = 1 << 5,
+  FK_STDEXTC = 1 << 6,
+  FK_64BIT = 1 << 7,
+};
+
+bool checkCPUKind(CPUKind Kind, bool IsRV64);
+CPUKind parseCPUKind(StringRef CPU);
+StringRef getMArchFromMcpu(StringRef CPU);
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
+bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector<StringRef> &Features);
+
+} // namespace RISCV
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 10b6e1c6a21b6..be119b8ab8552 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -37,9 +37,9 @@ struct AlignmentFromAssumptionsPass
   ScalarEvolution *SE = nullptr;
   DominatorTree *DT = nullptr;
 
-  bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
-                            const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
-  bool processAssumption(CallInst *I, unsigned Idx);
+  bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
+                            const SCEV *&OffSCEV);
+  bool processAssumption(CallInst *I);
 };
 }
 
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 05fe05a0bd851..972d0d3ea7f2b 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -108,17 +108,10 @@ llvm::getKnowledgeFromBundle(CallInst &Assume,
   Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey());
   if (bundleHasArgument(BOI, ABA_WasOn))
     Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn);
-  auto GetArgOr1 = [&](unsigned Idx) -> unsigned {
-    if (auto *ConstInt = dyn_cast<ConstantInt>(
-            getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx)))
-      return ConstInt->getZExtValue();
-    return 1;
-  };
   if (BOI.End - BOI.Begin > ABA_Argument)
-    Result.ArgValue = GetArgOr1(0);
-  if (Result.AttrKind == Attribute::Alignment)
-    if (BOI.End - BOI.Begin > ABA_Argument + 1)
-      Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1));
+    Result.ArgValue =
+        cast<ConstantInt>(getValueFromBundleOpInfo(Assume, BOI, ABA_Argument))
+            ->getZExtValue();
   return Result;
 }
 
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 8c66decaaf58d..6feffcbb98e1f 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -342,8 +342,12 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
     // pointers legally).
     if (C->isNullValue() && !DestTy->isX86_MMXTy())
       return Constant::getNullValue(DestTy);
-    if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
-        !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
+    if (C->isAllOnesValue() &&
+        (DestTy->isIntegerTy() || DestTy->isFloatingPointTy() ||
+         DestTy->isVectorTy()) &&
+        !DestTy->isX86_MMXTy() && !DestTy->isPtrOrPtrVectorTy())
+      // Get ones when the input is trivial, but
+      // only for supported types inside getAllOnesValue.
       return Constant::getAllOnesValue(DestTy);
 
     // If the type sizes are the same and a cast is legal, just directly
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 0975a65d183e4..d3bdf9d6aafd0 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4118,15 +4118,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
   if (TrueVal == FalseVal)
     return TrueVal;
 
-  // If the true or false value is undef, we can fold to the other value as
-  // long as the other value isn't poison.
-  // select ?, undef, X -> X
-  if (isa<UndefValue>(TrueVal) &&
-      isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.CxtI, Q.DT))
+  if (isa<UndefValue>(TrueVal))   // select ?, undef, X -> X
     return FalseVal;
-  // select ?, X, undef -> X
-  if (isa<UndefValue>(FalseVal) &&
-      isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.CxtI, Q.DT))
+  if (isa<UndefValue>(FalseVal))   // select ?, X, undef -> X
     return TrueVal;
 
   // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC''
@@ -4146,11 +4140,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
       // one element is undef, choose the defined element as the safe result.
       if (TEltC == FEltC)
         NewC.push_back(TEltC);
-      else if (isa<UndefValue>(TEltC) &&
-               isGuaranteedNotToBeUndefOrPoison(FEltC))
+      else if (isa<UndefValue>(TEltC))
         NewC.push_back(FEltC);
-      else if (isa<UndefValue>(FEltC) &&
-               isGuaranteedNotToBeUndefOrPoison(TEltC))
+      else if (isa<UndefValue>(FEltC))
         NewC.push_back(TEltC);
       else
         break;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 48c686b732608..3c96b3f20461e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3317,10 +3317,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
   }
 
   // Add the total offset from all the GEP indices to the base.
-  auto *GEPExpr = getAddExpr(BaseExpr, TotalOffset, Wrap);
-  assert(BaseExpr->getType() == GEPExpr->getType() &&
-         "GEP should not change type mid-flight.");
-  return GEPExpr;
+  return getAddExpr(BaseExpr, TotalOffset, Wrap);
 }
 
 std::tuple<SCEV *, FoldingSetNodeID, void *>
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index de336abe607a5..615ff4b8789c0 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -70,7 +70,6 @@ STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
 STATISTIC(NumCommuted        , "Number of instructions commuted to coalesce");
 STATISTIC(NumAggrCommuted    , "Number of instructions aggressively commuted");
 STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
-STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
 STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
 STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
@@ -109,10 +108,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // Set of already processed instructions in the current block.
   SmallPtrSet<MachineInstr*, 8> Processed;
 
-  // Set of instructions converted to three-address by target and then sunk
-  // down current basic block.
-  SmallPtrSet<MachineInstr*, 8> SunkInstrs;
-
   // A map from virtual registers to physical registers which are likely targets
   // to be coalesced to due to copies from physical registers to virtual
   // registers. e.g. v1024 = move r0.
@@ -123,9 +118,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // registers. e.g. r1 = move v1024.
   DenseMap<unsigned, unsigned> DstRegMap;
 
-  bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
-                            MachineBasicBlock::iterator OldPos);
-
   bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
 
   bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
@@ -209,136 +201,6 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
 
 static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
 
-/// A two-address instruction has been converted to a three-address instruction
-/// to avoid clobbering a register. Try to sink it past the instruction that
-/// would kill the above mentioned register to reduce register pressure.
-bool TwoAddressInstructionPass::
-sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
-                     MachineBasicBlock::iterator OldPos) {
-  // FIXME: Shouldn't we be trying to do this before we three-addressify the
-  // instruction?  After this transformation is done, we no longer need
-  // the instruction to be in three-address form.
-
-  // Check if it's safe to move this instruction.
-  bool SeenStore = true; // Be conservative.
-  if (!MI->isSafeToMove(AA, SeenStore))
-    return false;
-
-  unsigned DefReg = 0;
-  SmallSet<unsigned, 4> UseRegs;
-
-  for (const MachineOperand &MO : MI->operands()) {
-    if (!MO.isReg())
-      continue;
-    Register MOReg = MO.getReg();
-    if (!MOReg)
-      continue;
-    if (MO.isUse() && MOReg != SavedReg)
-      UseRegs.insert(MO.getReg());
-    if (!MO.isDef())
-      continue;
-    if (MO.isImplicit())
-      // Don't try to move it if it implicitly defines a register.
-      return false;
-    if (DefReg)
-      // For now, don't move any instructions that define multiple registers.
-      return false;
-    DefReg = MO.getReg();
-  }
-
-  // Find the instruction that kills SavedReg.
-  MachineInstr *KillMI = nullptr;
-  if (LIS) {
-    LiveInterval &LI = LIS->getInterval(SavedReg);
-    assert(LI.end() != LI.begin() &&
-           "Reg should not have empty live interval.");
-
-    SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
-    LiveInterval::const_iterator I = LI.find(MBBEndIdx);
-    if (I != LI.end() && I->start < MBBEndIdx)
-      return false;
-
-    --I;
-    KillMI = LIS->getInstructionFromIndex(I->end);
-  }
-  if (!KillMI) {
-    for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) {
-      if (!UseMO.isKill())
-        continue;
-      KillMI = UseMO.getParent();
-      break;
-    }
-  }
-
-  // If we find the instruction that kills SavedReg, and it is in an
-  // appropriate location, we can try to sink the current instruction
-  // past it.
-  if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
-      MachineBasicBlock::iterator(KillMI) == OldPos || KillMI->isTerminator())
-    return false;
-
-  // If any of the definitions are used by another instruction between the
-  // position and the kill use, then it's not safe to sink it.
-  //
-  // FIXME: This can be sped up if there is an easy way to query whether an
-  // instruction is before or after another instruction. Then we can use
-  // MachineRegisterInfo def / use instead.
-  MachineOperand *KillMO = nullptr;
-  MachineBasicBlock::iterator KillPos = KillMI;
-  ++KillPos;
-
-  unsigned NumVisited = 0;
-  for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) {
-    // Debug instructions cannot be counted against the limit.
-    if (OtherMI.isDebugInstr())
-      continue;
-    if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
-      return false;
-    ++NumVisited;
-    for (unsigned i = 0, e = OtherMI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = OtherMI.getOperand(i);
-      if (!MO.isReg())
-        continue;
-      Register MOReg = MO.getReg();
-      if (!MOReg)
-        continue;
-      if (DefReg == MOReg)
-        return false;
-
-      if (MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))) {
-        if (&OtherMI == KillMI && MOReg == SavedReg)
-          // Save the operand that kills the register. We want to unset the kill
-          // marker if we can sink MI past it.
-          KillMO = &MO;
-        else if (UseRegs.count(MOReg))
-          // One of the uses is killed before the destination.
-          return false;
-      }
-    }
-  }
-  assert(KillMO && "Didn't find kill");
-
-  if (!LIS) {
-    // Update kill and LV information.
-    KillMO->setIsKill(false);
-    KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
-    KillMO->setIsKill(true);
-
-    if (LV)
-      LV->replaceKillInstruction(SavedReg, *KillMI, *MI);
-  }
-
-  // Move instruction to its destination.
-  MBB->remove(MI);
-  MBB->insert(KillPos, MI);
-
-  if (LIS)
-    LIS->handleMove(*MI);
-
-  ++Num3AddrSunk;
-  return true;
-}
-
 /// Return the MachineInstr* if it is the single def of the Reg in current BB.
 static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
                                   const MachineRegisterInfo *MRI) {
@@ -740,26 +602,15 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
 
   LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
   LLVM_DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
-  bool Sunk = false;
 
   if (LIS)
     LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
 
-  if (NewMI->findRegisterUseOperand(RegB, false, TRI))
-    // FIXME: Temporary workaround. If the new instruction doesn't
-    // uses RegB, convertToThreeAddress must have created more
-    // then one instruction.
-    Sunk = sink3AddrInstruction(NewMI, RegB, mi);
-
   MBB->erase(mi); // Nuke the old inst.
 
-  if (!Sunk) {
-    DistanceMap.insert(std::make_pair(NewMI, Dist));
-    mi = NewMI;
-    nmi = std::next(mi);
-  }
-  else
-    SunkInstrs.insert(NewMI);
+  DistanceMap.insert(std::make_pair(NewMI, Dist));
+  mi = NewMI;
+  nmi = std::next(mi);
 
   // Update source and destination register maps.
   SrcRegMap.erase(RegA);
@@ -1700,13 +1551,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
     SrcRegMap.clear();
     DstRegMap.clear();
     Processed.clear();
-    SunkInstrs.clear();
     for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
          mi != me; ) {
       MachineBasicBlock::iterator nmi = std::next(mi);
-      // Don't revisit an instruction previously converted by target. It may
-      // contain undef register operands (%noreg), which are not handled.
-      if (mi->isDebugInstr() || SunkInstrs.count(&*mi)) {
+      // Skip debug instructions.
+      if (mi->isDebugInstr()) {
         mi = nmi;
         continue;
       }
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index f02246cda7fc6..f3c3e9ad9f696 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -779,30 +779,10 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
     if (isa<UndefValue>(V1)) return V1;
     return V2;
   }
-
+  if (isa<UndefValue>(V1)) return V2;
+  if (isa<UndefValue>(V2)) return V1;
   if (V1 == V2) return V1;
 
-  // If the true or false value is undef, we can fold to the other value as
-  // long as the other value isn't poison.
-  auto NotPoison = [](Constant *C) {
-    // TODO: We can analyze ConstExpr by opcode to determine if there is any
-    //       possibility of poison.
-    if (isa<ConstantExpr>(C))
-      return false;
-
-    if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(C) ||
-        isa<ConstantPointerNull>(C) || isa<Function>(C))
-      return true;
-
-    if (C->getType()->isVectorTy())
-      return !C->containsUndefElement() && !C->containsConstantExpression();
-
-    // TODO: Recursively analyze aggregates or other constants.
-    return false;
-  };
-  if (isa<UndefValue>(V1) && NotPoison(V2)) return V2;
-  if (isa<UndefValue>(V2) && NotPoison(V1)) return V1;
-
   if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
     if (TrueVal->getOpcode() == Instruction::Select)
       if (TrueVal->getOperand(0) == Cond)
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b87dfe1c8df65..1fffce015f707 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -71,9 +71,8 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
 static CallInst *createCallHelper(Function *Callee, ArrayRef<Value *> Ops,
                                   IRBuilderBase *Builder,
                                   const Twine &Name = "",
-                                  Instruction *FMFSource = nullptr,
-                                  ArrayRef<OperandBundleDef> OpBundles = {}) {
-  CallInst *CI = Builder->CreateCall(Callee, Ops, OpBundles, Name);
+                                  Instruction *FMFSource = nullptr) {
+  CallInst *CI = Builder->CreateCall(Callee, Ops, Name);
   if (FMFSource)
     CI->copyFastMathFlags(FMFSource);
   return CI;
@@ -450,16 +449,14 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
   return createCallHelper(TheFn, Ops, this);
 }
 
-CallInst *
-IRBuilderBase::CreateAssumption(Value *Cond,
-                                ArrayRef<OperandBundleDef> OpBundles) {
+CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
   assert(Cond->getType() == getInt1Ty() &&
          "an assumption condition must be of type i1");
 
   Value *Ops[] = { Cond };
   Module *M = BB->getParent()->getParent();
   Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
-  return createCallHelper(FnAssume, Ops, this, "", nullptr, OpBundles);
+  return createCallHelper(FnAssume, Ops, this);
 }
 
 /// Create a call to a Masked Load intrinsic.
@@ -1110,37 +1107,63 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex(
   return Fn;
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(const DataLayout &DL,
-                                                         Value *PtrValue,
-                                                         Value *AlignValue,
-                                                         Value *OffsetValue) {
-  SmallVector<Value *, 4> Vals({PtrValue, AlignValue});
-  if (OffsetValue)
-    Vals.push_back(OffsetValue);
-  OperandBundleDefT<Value *> AlignOpB("align", Vals);
-  return CreateAssumption(ConstantInt::getTrue(getContext()), {AlignOpB});
+CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(
+    const DataLayout &DL, Value *PtrValue, Value *Mask, Type *IntPtrTy,
+    Value *OffsetValue, Value **TheCheck) {
+  Value *PtrIntValue = CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
+
+  if (OffsetValue) {
+    bool IsOffsetZero = false;
+    if (const auto *CI = dyn_cast<ConstantInt>(OffsetValue))
+      IsOffsetZero = CI->isZero();
+
+    if (!IsOffsetZero) {
+      if (OffsetValue->getType() != IntPtrTy)
+        OffsetValue = CreateIntCast(OffsetValue, IntPtrTy, /*isSigned*/ true,
+                                    "offsetcast");
+      PtrIntValue = CreateSub(PtrIntValue, OffsetValue, "offsetptr");
+    }
+  }
+
+  Value *Zero = ConstantInt::get(IntPtrTy, 0);
+  Value *MaskedPtr = CreateAnd(PtrIntValue, Mask, "maskedptr");
+  Value *InvCond = CreateICmpEQ(MaskedPtr, Zero, "maskcond");
+  if (TheCheck)
+    *TheCheck = InvCond;
+
+  return CreateAssumption(InvCond);
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
-                                                   Value *PtrValue,
-                                                   unsigned Alignment,
-                                                   Value *OffsetValue) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(
+    const DataLayout &DL, Value *PtrValue, unsigned Alignment,
+    Value *OffsetValue, Value **TheCheck) {
   assert(isa<PointerType>(PtrValue->getType()) &&
          "trying to create an alignment assumption on a non-pointer?");
   assert(Alignment != 0 && "Invalid Alignment");
   auto *PtrTy = cast<PointerType>(PtrValue->getType());
   Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
-  Value *AlignValue = ConstantInt::get(IntPtrTy, Alignment);
-  return CreateAlignmentAssumptionHelper(DL, PtrValue, AlignValue, OffsetValue);
+
+  Value *Mask = ConstantInt::get(IntPtrTy, Alignment - 1);
+  return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
+                                         OffsetValue, TheCheck);
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
-                                                   Value *PtrValue,
-                                                   Value *Alignment,
-                                                   Value *OffsetValue) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(
+    const DataLayout &DL, Value *PtrValue, Value *Alignment,
+    Value *OffsetValue, Value **TheCheck) {
   assert(isa<PointerType>(PtrValue->getType()) &&
          "trying to create an alignment assumption on a non-pointer?");
-  return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue);
+  auto *PtrTy = cast<PointerType>(PtrValue->getType());
+  Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
+
+  if (Alignment->getType() != IntPtrTy)
+    Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ false,
+                              "alignmentcast");
+
+  Value *Mask = CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "mask");
+
+  return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
+                                         OffsetValue, TheCheck);
 }
 
 IRBuilderDefaultInserter::~IRBuilderDefaultInserter() {}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 6df1072925f92..c518ae87ea9b0 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4449,32 +4449,21 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       Assert(Elem.Tag->getKey() == "ignore" ||
                  Attribute::isExistingAttribute(Elem.Tag->getKey()),
              "tags must be valid attribute names");
+      Assert(Elem.End - Elem.Begin <= 2, "to many arguments");
       Attribute::AttrKind Kind =
           Attribute::getAttrKindFromName(Elem.Tag->getKey());
-      unsigned ArgCount = Elem.End - Elem.Begin;
-      if (Kind == Attribute::Alignment) {
-        Assert(ArgCount <= 3 && ArgCount >= 2,
-               "alignment assumptions should have 2 or 3 arguments");
-        Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
-               "first argument should be a pointer");
-        Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
-               "second argument should be an integer");
-        if (ArgCount == 3)
-          Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
-                 "third argument should be an integer if present");
-        return;
-      }
-      Assert(ArgCount <= 2, "to many arguments");
       if (Kind == Attribute::None)
         break;
       if (Attribute::doesAttrKindHaveArgument(Kind)) {
-        Assert(ArgCount == 2, "this attribute should have 2 arguments");
+        Assert(Elem.End - Elem.Begin == 2,
+               "this attribute should have 2 arguments");
         Assert(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
                "the second argument should be a constant integral value");
       } else if (isFuncOnlyAttr(Kind)) {
-        Assert((ArgCount) == 0, "this attribute has no argument");
+        Assert((Elem.End - Elem.Begin) == 0, "this attribute has no argument");
       } else if (!isFuncOrArgAttr(Kind)) {
-        Assert((ArgCount) == 1, "this attribute should have one argument");
+        Assert((Elem.End - Elem.Begin) == 1,
+               "this attribute should have one argument");
       }
     }
     break;
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 58c22b2ccef26..e2aaeaae03b09 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -812,9 +812,6 @@ private:
                              const StructInitializer &Initializer);
 
   // User-defined types (structs, unions):
-  bool emitStructValue(const StructInfo &Structure,
-                       const StructInitializer &Initializer,
-                       size_t InitialOffset = 0, size_t InitialField = 0);
   bool emitStructValues(const StructInfo &Structure);
   bool addStructField(StringRef Name, const StructInfo &Structure);
   bool parseDirectiveStructValue(const StructInfo &Structure,
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 3f3f79b0f4ff7..ad7a50d13bb7e 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -62,6 +62,8 @@ static bool supportsAArch64(uint64_t Type) {
   switch (Type) {
   case ELF::R_AARCH64_ABS32:
   case ELF::R_AARCH64_ABS64:
+  case ELF::R_AARCH64_PREL32:
+  case ELF::R_AARCH64_PREL64:
     return true;
   default:
     return false;
@@ -74,6 +76,10 @@ static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) {
     return (S + getELFAddend(R)) & 0xFFFFFFFF;
   case ELF::R_AARCH64_ABS64:
     return S + getELFAddend(R);
+  case ELF::R_AARCH64_PREL32:
+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+  case ELF::R_AARCH64_PREL64:
+    return S + getELFAddend(R) - R.getOffset();
   default:
     llvm_unreachable("Invalid relocation type");
   }
@@ -152,6 +158,8 @@ static bool supportsPPC64(uint64_t Type) {
   switch (Type) {
   case ELF::R_PPC64_ADDR32:
   case ELF::R_PPC64_ADDR64:
+  case ELF::R_PPC64_REL32:
+  case ELF::R_PPC64_REL64:
     return true;
   default:
     return false;
@@ -164,6 +172,10 @@ static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) {
     return (S + getELFAddend(R)) & 0xFFFFFFFF;
   case ELF::R_PPC64_ADDR64:
     return S + getELFAddend(R);
+  case ELF::R_PPC64_REL32:
+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+  case ELF::R_PPC64_REL64:
+    return S + getELFAddend(R) - R.getOffset();
   default:
     llvm_unreachable("Invalid relocation type");
   }
@@ -259,12 +271,22 @@ static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) {
 }
 
 static bool supportsPPC32(uint64_t Type) {
-  return Type == ELF::R_PPC_ADDR32;
+  switch (Type) {
+  case ELF::R_PPC_ADDR32:
+  case ELF::R_PPC_REL32:
+    return true;
+  default:
+    return false;
+  }
 }
 
 static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
-  if (R.getType() == ELF::R_PPC_ADDR32)
+  switch (R.getType()) {
+  case ELF::R_PPC_ADDR32:
     return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  case ELF::R_PPC_REL32:
+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+  }
   llvm_unreachable("Invalid relocation type");
 }
 
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index be9b541237c74..031384ebaa91c 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/ARMBuildAttributes.h"
 
 using namespace llvm;
 using namespace AMDGPU;
@@ -208,3 +209,64 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
   default:         return {0, 0, 0};
   }
 }
+
+namespace llvm {
+namespace RISCV {
+
+struct CPUInfo {
+  StringLiteral Name;
+  CPUKind Kind;
+  unsigned Features;
+  StringLiteral DefaultMarch;
+  bool is64Bit() const { return (Features & FK_64BIT); }
+};
+
+constexpr CPUInfo RISCVCPUInfo[] = {
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH)                              \
+  {NAME, CK_##ENUM, FEATURES, DEFAULT_MARCH},
+#include "llvm/Support/RISCVTargetParser.def"
+};
+
+bool checkCPUKind(CPUKind Kind, bool IsRV64) {
+  if (Kind == CK_INVALID)
+    return false;
+  return RISCVCPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsRV64;
+}
+
+CPUKind parseCPUKind(StringRef CPU) {
+  return llvm::StringSwitch<CPUKind>(CPU)
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
+#include "llvm/Support/RISCVTargetParser.def"
+      .Default(CK_INVALID);
+}
+
+StringRef getMArchFromMcpu(StringRef CPU) {
+  CPUKind Kind = parseCPUKind(CPU);
+  return RISCVCPUInfo[static_cast<unsigned>(Kind)].DefaultMarch;
+}
+
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64) {
+  for (const auto &C : RISCVCPUInfo) {
+    if (C.Kind != CK_INVALID && IsRV64 == C.is64Bit())
+      Values.emplace_back(C.Name);
+  }
+}
+
+// Get all features except standard extension feature
+bool getCPUFeaturesExceptStdExt(CPUKind Kind,
+                                std::vector<StringRef> &Features) {
+  unsigned CPUFeatures = RISCVCPUInfo[static_cast<unsigned>(Kind)].Features;
+
+  if (CPUFeatures == FK_INVALID)
+    return false;
+
+  if (CPUFeatures & FK_64BIT)
+    Features.push_back("+64bit");
+  else
+    Features.push_back("-64bit");
+
+  return true;
+}
+
+} // namespace RISCV
+} // namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index bd9174c1973dc..2ee394e9259d4 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1466,11 +1466,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
           .addImm(0)
           .addImm(32 - Log2(MaxAlign))
           .addImm(31);
-    BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
+    BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
             SPReg)
-        .addReg(FPReg)
-        .addReg(SPReg)
-        .addReg(ScratchReg);
+        .addReg(ScratchReg)
+        .addReg(SPReg);
   }
   // Probe residual part.
   if (NegResidualSize) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ddfbd04e1ebc5..11454841cab7d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11950,18 +11950,34 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
-
-  // Get the canonical FinalStackPtr like what
-  // PPCRegisterInfo::lowerDynamicAlloc does.
-  BuildMI(*MBB, {MI}, DL,
-          TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
-                           : PPC::PREPARE_PROBED_ALLOCA_32),
-          FramePointer)
-      .addDef(FinalStackPtr)
+  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+
+  // Since value of NegSizeReg might be realigned in prologepilog, insert a
+  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
+  // NegSize.
+  unsigned ProbeOpc;
+  if (!MRI.hasOneNonDBGUse(NegSizeReg))
+    ProbeOpc =
+        isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
+  else
+    // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
+    // and NegSizeReg will be allocated in the same phyreg to avoid
+    // redundant copy when NegSizeReg has only one use which is current MI and
+    // will be replaced by PREPARE_PROBED_ALLOCA then.
+    ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
+                       : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
+  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
+      .addDef(ActualNegSizeReg)
       .addReg(NegSizeReg)
       .add(MI.getOperand(2))
       .add(MI.getOperand(3));
 
+  // Calculate final stack pointer, which equals to SP + ActualNegSize.
+  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+          FinalStackPtr)
+      .addReg(SPReg)
+      .addReg(ActualNegSizeReg);
+
   // Materialize a scratch register for update.
   int64_t NegProbeSize = -(int64_t)ProbeSize;
   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
@@ -11982,7 +11998,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
     // Probing leading residual part.
     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
-        .addReg(NegSizeReg)
+        .addReg(ActualNegSizeReg)
         .addReg(ScratchReg);
     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
@@ -11991,7 +12007,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
         .addReg(Mul)
-        .addReg(NegSizeReg);
+        .addReg(ActualNegSizeReg);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
         .addReg(FramePointer)
         .addReg(SPReg)
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 1c457d4170d54..6956c40a70be5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -431,9 +431,14 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
                          (ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
                            [(set i64:$result,
                              (PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
-def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
-    g8rc:$sp),
+def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs
+    g8rc:$fp, g8rc:$actual_negsize),
     (ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
+def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 : PPCEmitTimePseudo<(outs
+    g8rc:$fp, g8rc:$actual_negsize),
+    (ins g8rc:$negsize, memri:$fpsi),
+    "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64", []>,
+    RegConstraint<"$actual_negsize = $negsize">;
 def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
     (ins i64imm:$stacksize),
     "#PROBED_STACKALLOC_64", []>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 673ab63039cf7..fedbf592af39a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1406,9 +1406,14 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
                          (ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
                            [(set i32:$result,
                              (PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
-def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
-    gprc:$sp),
+def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs
+    gprc:$fp, gprc:$actual_negsize),
     (ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
+def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32 : PPCEmitTimePseudo<(outs
+    gprc:$fp, gprc:$actual_negsize),
+    (ins gprc:$negsize, memri:$fpsi),
+    "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32", []>,
+    RegConstraint<"$actual_negsize = $negsize">;
 def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
     (ins i64imm:$stacksize),
     "#PROBED_STACKALLOC_32", []>;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 35f5e1fbebcdf..ed8948a639728 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -624,21 +624,30 @@ void PPCRegisterInfo::lowerPrepareProbedAlloca(
   bool LP64 = TM.isPPC64();
   DebugLoc dl = MI.getDebugLoc();
   Register FramePointer = MI.getOperand(0).getReg();
-  Register FinalStackPtr = MI.getOperand(1).getReg();
+  const Register ActualNegSizeReg = MI.getOperand(1).getReg();
   bool KillNegSizeReg = MI.getOperand(2).isKill();
   Register NegSizeReg = MI.getOperand(2).getReg();
-  prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
-  if (LP64) {
-    BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
-        .addReg(PPC::X1)
-        .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
-
-  } else {
-    BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
-        .addReg(PPC::R1)
-        .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+  const MCInstrDesc &CopyInst = TII.get(LP64 ? PPC::OR8 : PPC::OR);
+  // RegAllocator might allocate FramePointer and NegSizeReg in the same phyreg.
+  if (FramePointer == NegSizeReg) {
+    assert(KillNegSizeReg && "FramePointer is a def and NegSizeReg is an use, "
+                             "NegSizeReg should be killed");
+    // FramePointer is clobbered earlier than the use of NegSizeReg in
+    // prepareDynamicAlloca, save NegSizeReg in ActualNegSizeReg to avoid
+    // misuse.
+    BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
+        .addReg(NegSizeReg)
+        .addReg(NegSizeReg);
+    NegSizeReg = ActualNegSizeReg;
+    KillNegSizeReg = false;
   }
-
+  prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
+  // NegSizeReg might be updated in prepareDynamicAlloca if MaxAlign >
+  // TargetAlign.
+  if (NegSizeReg != ActualNegSizeReg)
+    BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
+        .addReg(NegSizeReg)
+        .addReg(NegSizeReg);
   MBB.erase(II);
 }
 
@@ -1084,7 +1093,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
-       OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
+       OpC == PPC::PREPARE_PROBED_ALLOCA_32 ||
+       OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 ||
+       OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32)) {
     lowerPrepareProbedAlloca(II);
     return;
   }
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index f0583f6919361..57e7c41c42711 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -215,6 +215,16 @@ def : ProcessorModel<"rocket-rv32", Rocket32Model, []>;
 
 def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit]>;
 
+def : ProcessorModel<"sifive-e31", Rocket32Model, [FeatureStdExtM,
+                                                   FeatureStdExtA,
+                                                   FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-u54", Rocket64Model, [Feature64Bit,
+                                                   FeatureStdExtM,
+                                                   FeatureStdExtA,
+                                                   FeatureStdExtF,
+                                                   FeatureStdExtD,
+                                                   FeatureStdExtC]>;
 
 //===----------------------------------------------------------------------===//
 // Define the RISC-V target.
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 5cf4516ede974..e32335331879e 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -463,7 +463,14 @@ struct X86Operand final : public MCParsedAsmOperand {
   bool isGR32orGR64() const {
     return Kind == Register &&
       (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
-      X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
+  }
+
+  bool isGR16orGR32orGR64() const {
+    return Kind == Register &&
+      (X86MCRegisterClasses[X86::GR16RegClassID].contains(getReg()) ||
+       X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
   }
 
   bool isVectorReg() const {
@@ -520,6 +527,15 @@ struct X86Operand final : public MCParsedAsmOperand {
     Inst.addOperand(MCOperand::createReg(RegNo));
   }
 
+  void addGR16orGR32orGR64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    MCRegister RegNo = getReg();
+    if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo) ||
+        X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
+      RegNo = getX86SubSuperRegister(RegNo, 16);
+    Inst.addOperand(MCOperand::createReg(RegNo));
+  }
+
   void addAVX512RCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     addExpr(Inst, getImm());
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 450927aaf5cc7..f8b6b7eb3aff6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6916,25 +6916,16 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     DecodeZeroMoveLowMask(NumElems, Mask);
     IsUnary = true;
     break;
-  case X86ISD::VBROADCAST: {
-    SDValue N0 = N->getOperand(0);
-    // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
-    // add the pre-extracted value to the Ops vector.
-    if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
-        N0.getOperand(0).getValueType() == VT &&
-        N0.getConstantOperandVal(1) == 0)
-      Ops.push_back(N0.getOperand(0));
-
-    // We only decode broadcasts of same-sized vectors, unless the broadcast
-    // came from an extract from the original width. If we found one, we
-    // pushed it the Ops vector above.
-    if (N0.getValueType() == VT || !Ops.empty()) {
+  case X86ISD::VBROADCAST:
+    // We only decode broadcasts of same-sized vectors, peeking through to
+    // extracted subvectors is likely to cause hasOneUse issues with
+    // SimplifyDemandedBits etc.
+    if (N->getOperand(0).getValueType() == VT) {
       DecodeVectorBroadcast(NumElems, Mask);
       IsUnary = true;
       break;
     }
     return false;
-  }
   case X86ISD::VPERMILPV: {
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     IsUnary = true;
@@ -44523,6 +44514,8 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
       isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd))
     return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
 
+  // NOTE: isHorizontalBinOp may have changed LHS/RHS variables.
+
   return SDValue();
 }
 
@@ -47604,6 +47597,30 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
                           PMADDBuilder);
 }
 
+static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG,
+                                           const X86Subtarget &Subtarget) {
+  EVT VT = N->getValueType(0);
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  bool IsAdd = N->getOpcode() == ISD::ADD;
+  assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode");
+
+  if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
+       VT == MVT::v8i32) &&
+      Subtarget.hasSSSE3() &&
+      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd)) {
+    auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL,
+                              ArrayRef<SDValue> Ops) {
+      return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB,
+                         DL, Ops[0].getValueType(), Ops);
+    };
+    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
+                            HOpBuilder);
+  }
+
+  return SDValue();
+}
+
 static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
                           TargetLowering::DAGCombinerInfo &DCI,
                           const X86Subtarget &Subtarget) {
@@ -47617,17 +47634,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
     return MAdd;
 
   // Try to synthesize horizontal adds from adds of shuffles.
-  if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
-       VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() &&
-      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, true)) {
-    auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
-                          ArrayRef<SDValue> Ops) {
-      return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
-    };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
-                            HADDBuilder);
-  }
+  if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget))
+    return V;
 
   // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into
   // (sub Y, (sext (vXi1 X))).
@@ -47800,18 +47808,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
   }
 
   // Try to synthesize horizontal subs from subs of shuffles.
-  EVT VT = N->getValueType(0);
-  if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
-       VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() &&
-      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, false)) {
-    auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
-                          ArrayRef<SDValue> Ops) {
-      return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
-    };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
-                            HSUBBuilder);
-  }
+  if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget))
+    return V;
 
   // Try to create PSUBUS if SUB's argument is max/min
   if (SDValue V = combineSubToSubus(N, DAG, Subtarget))
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 23841c3d7e506..3ea0ae8a88407 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -640,10 +640,17 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
 def X86GR32orGR64AsmOperand : AsmOperandClass {
   let Name = "GR32orGR64";
 }
-
 def GR32orGR64 : RegisterOperand<GR32> {
   let ParserMatchClass = X86GR32orGR64AsmOperand;
 }
+
+def X86GR16orGR32orGR64AsmOperand : AsmOperandClass {
+  let Name = "GR16orGR32orGR64";
+}
+def GR16orGR32orGR64 : RegisterOperand<GR16> {
+  let ParserMatchClass = X86GR16orGR32orGR64AsmOperand;
+}
+
 def AVX512RCOperand : AsmOperandClass {
   let Name = "AVX512RC";
 }
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index c23bc7ebbf702..13659b5c456e3 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -207,45 +207,41 @@ let mayLoad = 1 in
 def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                 "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
-def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src),
                 "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
 
-// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
 let mayLoad = 1 in
 def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                 "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
-def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src),
                 "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
-// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo.
 let mayLoad = 1 in
 def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                  "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
-def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src),
                  "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
 
-// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
 let mayLoad = 1 in
 def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                 "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
-def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src),
                 "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
-// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo.
 let mayLoad = 1 in
 def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                 "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
-def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src),
                 "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
 let mayLoad = 1 in
 def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
-def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
 
 def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index d3c718a919c0a..1304d46fdef4f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1148,11 +1148,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
   assert((IsAnd || Logic.getOpcode() == Instruction::Or) && "Wrong logic op");
 
   // Match an equality compare with a non-poison constant as Cmp0.
+  // Also, give up if the compare can be constant-folded to avoid looping.
   ICmpInst::Predicate Pred0;
   Value *X;
   Constant *C;
   if (!match(Cmp0, m_ICmp(Pred0, m_Value(X), m_Constant(C))) ||
-      !isGuaranteedNotToBeUndefOrPoison(C))
+      !isGuaranteedNotToBeUndefOrPoison(C) || isa<Constant>(X))
     return nullptr;
   if ((IsAnd && Pred0 != ICmpInst::ICMP_EQ) ||
       (!IsAnd && Pred0 != ICmpInst::ICMP_NE))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c734c9a68fb2d..836af6234ad5c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4220,16 +4220,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   case Intrinsic::assume: {
     Value *IIOperand = II->getArgOperand(0);
-    SmallVector<OperandBundleDef, 4> OpBundles;
-    II->getOperandBundlesAsDefs(OpBundles);
-    bool HasOpBundles = !OpBundles.empty();
     // Remove an assume if it is followed by an identical assume.
     // TODO: Do we need this? Unless there are conflicting assumptions, the
     // computeKnownBits(IIOperand) below here eliminates redundant assumes.
     Instruction *Next = II->getNextNonDebugInstruction();
-    if (HasOpBundles &&
-        match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) &&
-        !cast<IntrinsicInst>(Next)->hasOperandBundles())
+    if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
       return eraseInstFromFunction(CI);
 
     // Canonicalize assume(a && b) -> assume(a); assume(b);
@@ -4239,15 +4234,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *AssumeIntrinsic = II->getCalledOperand();
     Value *A, *B;
     if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
-      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
-                         II->getName());
+      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName());
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
       return eraseInstFromFunction(*II);
     }
     // assume(!(a || b)) -> assume(!a); assume(!b);
     if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
-                         Builder.CreateNot(A), OpBundles, II->getName());
+                         Builder.CreateNot(A), II->getName());
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
                          Builder.CreateNot(B), II->getName());
       return eraseInstFromFunction(*II);
@@ -4263,8 +4257,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         isValidAssumeForContext(II, LHS, &DT)) {
       MDNode *MD = MDNode::get(II->getContext(), None);
       LHS->setMetadata(LLVMContext::MD_nonnull, MD);
-      if (!HasOpBundles)
-        return eraseInstFromFunction(*II);
+      return eraseInstFromFunction(*II);
 
       // TODO: apply nonnull return attributes to calls and invokes
       // TODO: apply range metadata for range check patterns?
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index f918dc7198ca9..ca51f37af4d93 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -653,7 +653,7 @@ public:
            "New instruction already inserted into a basic block!");
     BasicBlock *BB = Old.getParent();
     BB->getInstList().insert(Old.getIterator(), New); // Insert inst
-    Worklist.push(New);
+    Worklist.add(New);
     return New;
   }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 17124f717af79..db27711f29b17 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2469,6 +2469,10 @@ static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB,
   } else
     return nullptr;
 
+  // Make sure the branches are actually different.
+  if (TrueSucc == FalseSucc)
+    return nullptr;
+
   // We want to replace select %cond, %a, %b with a phi that takes value %a
   // for all incoming edges that are dominated by condition `%cond == true`,
   // and value %b for edges dominated by condition `%cond == false`. If %a
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index bccf94fc217fe..5c008585869cd 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Instructions.h"
 #include "llvm/InitializePasses.h"
 #define AA_NAME "alignment-from-assumptions"
 #define DEBUG_TYPE AA_NAME
@@ -204,33 +203,103 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
 }
 
 bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
-                                                        unsigned Idx,
                                                         Value *&AAPtr,
                                                         const SCEV *&AlignSCEV,
                                                         const SCEV *&OffSCEV) {
-  Type *Int64Ty = Type::getInt64Ty(I->getContext());
-  OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
-  if (AlignOB.getTagName() != "align")
+  // An alignment assume must be a statement about the least-significant
+  // bits of the pointer being zero, possibly with some offset.
+  ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
+  if (!ICI)
     return false;
-  assert(AlignOB.Inputs.size() >= 2);
-  AAPtr = AlignOB.Inputs[0].get();
-  // TODO: Consider accumulating the offset to the base.
-  AAPtr = AAPtr->stripPointerCastsSameRepresentation();
-  AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
-  AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
-  if (AlignOB.Inputs.size() == 3)
-    OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
-  else
+
+  // This must be an expression of the form: x & m == 0.
+  if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
+    return false;
+
+  // Swap things around so that the RHS is 0.
+  Value *CmpLHS = ICI->getOperand(0);
+  Value *CmpRHS = ICI->getOperand(1);
+  const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
+  const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
+  if (CmpLHSSCEV->isZero())
+    std::swap(CmpLHS, CmpRHS);
+  else if (!CmpRHSSCEV->isZero())
+    return false;
+
+  BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
+  if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
+    return false;
+
+  // Swap things around so that the right operand of the and is a constant
+  // (the mask); we cannot deal with variable masks.
+  Value *AndLHS = CmpBO->getOperand(0);
+  Value *AndRHS = CmpBO->getOperand(1);
+  const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
+  const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
+  if (isa<SCEVConstant>(AndLHSSCEV)) {
+    std::swap(AndLHS, AndRHS);
+    std::swap(AndLHSSCEV, AndRHSSCEV);
+  }
+
+  const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
+  if (!MaskSCEV)
+    return false;
+
+  // The mask must have some trailing ones (otherwise the condition is
+  // trivial and tells us nothing about the alignment of the left operand).
+  unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
+  if (!TrailingOnes)
+    return false;
+
+  // Cap the alignment at the maximum with which LLVM can deal (and make sure
+  // we don't overflow the shift).
+  uint64_t Alignment;
+  TrailingOnes = std::min(TrailingOnes,
+    unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+  Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
+
+  Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
+  AlignSCEV = SE->getConstant(Int64Ty, Alignment);
+
+  // The LHS might be a ptrtoint instruction, or it might be the pointer
+  // with an offset.
+  AAPtr = nullptr;
+  OffSCEV = nullptr;
+  if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
+    AAPtr = PToI->getPointerOperand();
     OffSCEV = SE->getZero(Int64Ty);
-  OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
+  } else if (const SCEVAddExpr* AndLHSAddSCEV =
+             dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
+    // Try to find the ptrtoint; subtract it and the rest is the offset.
+    for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
+         JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
+      if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
+        if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
+          AAPtr = PToI->getPointerOperand();
+          OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
+          break;
+        }
+  }
+
+  if (!AAPtr)
+    return false;
+
+  // Sign extend the offset to 64 bits (so that it is like all of the other
+  // expressions).
+  unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
+  if (OffSCEVBits < 64)
+    OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
+  else if (OffSCEVBits > 64)
+    return false;
+
+  AAPtr = AAPtr->stripPointerCasts();
   return true;
 }
 
-bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
-                                                     unsigned Idx) {
+bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
   Value *AAPtr;
   const SCEV *AlignSCEV, *OffSCEV;
-  if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
+  if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
     return false;
 
   // Skip ConstantPointerNull and UndefValue.  Assumptions on these shouldn't
@@ -248,14 +317,13 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
       continue;
 
     if (Instruction *K = dyn_cast<Instruction>(J))
+      if (isValidAssumeForContext(ACall, K, DT))
         WorkList.push_back(K);
   }
 
   while (!WorkList.empty()) {
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
-      if (!isValidAssumeForContext(ACall, J, DT))
-        continue;
       Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
                                            LI->getPointerOperand(), SE);
       if (NewAlignment > LI->getAlign()) {
@@ -263,8 +331,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         ++NumLoadAlignChanged;
       }
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
-      if (!isValidAssumeForContext(ACall, J, DT))
-        continue;
       Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
                                            SI->getPointerOperand(), SE);
       if (NewAlignment > SI->getAlign()) {
@@ -272,8 +338,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         ++NumStoreAlignChanged;
       }
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
-      if (!isValidAssumeForContext(ACall, J, DT))
-        continue;
       Align NewDestAlignment =
           getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE);
 
@@ -305,7 +369,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     Visited.insert(J);
     for (User *UJ : J->users()) {
       Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K))
+      if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
         WorkList.push_back(K);
     }
   }
@@ -332,11 +396,8 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
 
   bool Changed = false;
   for (auto &AssumeVH : AC.assumptions())
-    if (AssumeVH) {
-      CallInst *Call = cast<CallInst>(AssumeVH);
-      for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
-        Changed |= processAssumption(Call, Idx);
-    }
+    if (AssumeVH)
+      Changed |= processAssumption(cast<CallInst>(AssumeVH));
 
   return Changed;
 }
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 84f6d5210d745..6a245b5eb4253 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -874,6 +874,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("i16imm",              TYPE_IMM)
   TYPE("i16i8imm",            TYPE_IMM)
   TYPE("GR16",                TYPE_R16)
+  TYPE("GR16orGR32orGR64",    TYPE_R16)
   TYPE("i32mem",              TYPE_M)
   TYPE("i32imm",              TYPE_IMM)
   TYPE("i32i8imm",            TYPE_IMM)
@@ -1035,6 +1036,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
   ENCODING("RST",             ENCODING_FP)
   ENCODING("RSTi",            ENCODING_FP)
   ENCODING("GR16",            ENCODING_RM)
+  ENCODING("GR16orGR32orGR64",ENCODING_RM)
   ENCODING("GR32",            ENCODING_RM)
   ENCODING("GR32orGR64",      ENCODING_RM)
   ENCODING("GR64",            ENCODING_RM)
@@ -1072,6 +1074,7 @@ OperandEncoding
 RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
                                                 uint8_t OpSize) {
   ENCODING("GR16",            ENCODING_REG)
+  ENCODING("GR16orGR32orGR64",ENCODING_REG)
   ENCODING("GR32",            ENCODING_REG)
   ENCODING("GR32orGR64",      ENCODING_REG)
   ENCODING("GR64",            ENCODING_REG)