76 files changed, 1803 insertions, 864 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
index 461fd7239905..dd11b0b02bf8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
@@ -80,7 +80,7 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
 
 static bool isAlwaysLive(Instruction *I) {
   return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
-         I->mayHaveSideEffects();
+         I->mayHaveSideEffects() || !I->willReturn();
 }
 
 void DemandedBits::determineLiveOperandBits(
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index 7f311d8f9a2b..94a24ccf2155 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
@@ -243,11 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   if (RecurrenceType->isFloatingPointTy()) {
     if (!isFloatingPointRecurrenceKind(Kind))
       return false;
-  } else {
+  } else if (RecurrenceType->isIntegerTy()) {
     if (!isIntegerRecurrenceKind(Kind))
       return false;
     if (isArithmeticRecurrenceKind(Kind))
       Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+  } else {
+    // Pointer min/max may exist, but it is not supported as a reduction op.
+    return false;
   }
 
   Worklist.push_back(Start);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
index 52dca7d378e1..4722b68e20e9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
@@ -281,7 +281,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
     // clobbers where they don't really exist at all. Please see D43269 for
     // context.
     switch (II->getIntrinsicID()) {
-    case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start:
     case Intrinsic::invariant_end:
     case Intrinsic::assume:
@@ -358,22 +357,6 @@ struct UpwardsMemoryQuery {
 
 } // end anonymous namespace
 
-static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
-                           BatchAAResults &AA) {
-  Instruction *Inst = MD->getMemoryInst();
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-    switch (II->getIntrinsicID()) {
-    case Intrinsic::lifetime_end: {
-      MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
-      return AA.alias(ArgLoc, Loc) == MustAlias;
-    }
-    default:
-      return false;
-    }
-  }
-  return false;
-}
-
 template <typename AliasAnalysisType>
 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
                                                    const Instruction *I) {
@@ -1465,15 +1448,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
       }
 
       MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
-      // If the lifetime of the pointer ends at this instruction, it's live on
-      // entry.
-      if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) {
-        // Reset UpperBound to liveOnEntryDef's place in the stack
-        UpperBound = 0;
-        FoundClobberResult = true;
-        LocInfo.AR = MustAlias;
-        break;
-      }
       ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA);
       if (CA.IsClobber) {
         FoundClobberResult = true;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 7d97fc5da9b0..268acb682cf1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -737,3 +737,84 @@ bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
 void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
 }
+
+MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) {
+  // Fast path if there's no offset
+  if (Offset == 0)
+    return MD;
+  // Fast path if there's no path tbaa node (and thus scalar)
+  if (!isStructPathTBAA(MD))
+    return MD;
+
+  TBAAStructTagNode Tag(MD);
+  SmallVector<Metadata *, 5> Sub;
+  Sub.push_back(MD->getOperand(0));
+  Sub.push_back(MD->getOperand(1));
+  ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2));
+
+  if (Tag.isNewFormat()) {
+    ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3));
+
+    if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) {
+      return nullptr;
+    }
+
+    uint64_t NewSize = InnerSize->getZExtValue();
+    uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
+    if (InnerOffset->getZExtValue() < Offset) {
+      NewOffset = 0;
+      NewSize -= Offset - InnerOffset->getZExtValue();
+    }
+
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerOffset->getType(), NewOffset)));
+
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerSize->getType(), NewSize)));
+
+    // immutable type
+    if (MD->getNumOperands() >= 5)
+      Sub.push_back(MD->getOperand(4));
+  } else {
+    if (InnerOffset->getZExtValue() < Offset)
+      return nullptr;
+
+    Sub.push_back(ConstantAsMetadata::get(ConstantInt::get(
+        InnerOffset->getType(), InnerOffset->getZExtValue() - Offset)));
+
+    // immutable type
+    if (MD->getNumOperands() >= 4)
+      Sub.push_back(MD->getOperand(3));
+  }
+  return MDNode::get(MD->getContext(), Sub);
+}
+
+MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) {
+  // Fast path if there's no offset
+  if (Offset == 0)
+    return MD;
+  SmallVector<Metadata *, 3> Sub;
+  for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
+    ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i));
+    ConstantInt *InnerSize =
+        mdconst::extract<ConstantInt>(MD->getOperand(i + 1));
+    // Don't include any triples that aren't in bounds
+    if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset)
+      continue;
+
+    uint64_t NewSize = InnerSize->getZExtValue();
+    uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
+    if (InnerOffset->getZExtValue() < Offset) {
+      NewOffset = 0;
+      NewSize -= Offset - InnerOffset->getZExtValue();
+    }
+
+    // Shift the offset of the triple
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerOffset->getType(), NewOffset)));
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerSize->getType(), NewSize)));
+    Sub.push_back(MD->getOperand(i + 2));
+  }
+  return MDNode::get(MD->getContext(), Sub);
+}
+\ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index 5600a3b33750..e174c5efe424 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
@@ -5018,36 +5018,14 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
   // arbitrary length of time, but programs aren't allowed to rely on that.
 
   // If there is no successor, then execution can't transfer to it.
-  if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
-    return !CRI->unwindsToCaller();
-  if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
-    return !CatchSwitch->unwindsToCaller();
-  if (isa<ResumeInst>(I))
-    return false;
   if (isa<ReturnInst>(I))
     return false;
   if (isa<UnreachableInst>(I))
     return false;
 
-  // Calls can throw, or contain an infinite loop, or kill the process.
-  if (const auto *CB = dyn_cast<CallBase>(I)) {
-    // Call sites that throw have implicit non-local control flow.
-    if (!CB->doesNotThrow())
-      return false;
-
-    // A function which doens't throw and has "willreturn" attribute will
-    // always return.
-    if (CB->hasFnAttr(Attribute::WillReturn))
-      return true;
-
-    // FIXME: Temporarily assume that all side-effect free intrinsics will
-    // return. Remove this workaround once all intrinsics are appropriately
-    // annotated.
-    return isa<IntrinsicInst>(CB) && CB->onlyReadsMemory();
-  }
-
-  // Other instructions return normally.
-  return true;
+  // An instruction that returns without throwing must transfer control flow
+  // to a successor.
+  return !I->mayThrow() && I->willReturn();
 }
 
 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index df0219fcfa64..a9353bdfb780 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -968,10 +968,11 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
     return false;
 
-  // Check that the next block is the conditional branch target.
-  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
-    return false;
-  return true;
+  // Check that the next block is the conditional branch target. Also make sure
+  // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+  MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+  return BrCondTarget != MI.getOperand(0).getMBB() &&
+         MBB->isLayoutSuccessor(BrCondTarget);
 }
 
 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e7f40523efaf..3178ee16af2b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_PHI: {
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+
     unsigned NumParts = SizeOp0 / NarrowSize;
     SmallVector<Register, 2> DstRegs(NumParts);
     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 26439a656917..7fa14fd902ef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
         // If MI has side effects, it should become a barrier for code motion.
         // IOM is rebuild from the next instruction to prevent later
         // instructions from being moved before this MI.
-        if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
+        if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
+            Next != MBB.end()) {
           BuildInstOrderMap(Next, IOM);
           SawStore = false;
         }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 59d98054e3a2..b6cfd7dcbfbc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1462,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
 }
 
 bool MachineInstr::isLoadFoldBarrier() const {
-  return mayStore() || isCall() || hasUnmodeledSideEffects();
+  return mayStore() || isCall() ||
+         (hasUnmodeledSideEffects() && !isPseudoProbe());
 }
 
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 615bea2a4905..6a6f83827f72 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6517,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
 // to consider shift amounts with defined behavior.
+//
+// The IsRotate flag should be set when the LHS of both shifts is the same.
+// Otherwise if matching a general funnel shift, it should be clear.
 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
-                           SelectionDAG &DAG) {
+                           SelectionDAG &DAG, bool IsRotate) {
   // If EltSize is a power of 2 then:
   //
   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -6550,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
   // always invokes undefined behavior for 32-bit X.
   //
   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+  //
+  // NOTE: We can only do this when matching an AND and not a general
+  // funnel shift.
   unsigned MaskLoBits = 0;
-  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+  if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
       unsigned Bits = Log2_64(EltSize);
@@ -6641,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   //          (srl x, (*ext y))) ->
   //   (rotr x, y) or (rotl x, (sub 32, y))
   EVT VT = Shifted.getValueType();
-  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
+  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
+                     /*IsRotate*/ true)) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
                        HasPos ? Pos : Neg);
@@ -6670,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
   // fold (or (shl x0, (*ext (sub 32, y))),
   //          (srl x1, (*ext y))) ->
   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
-  if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+  if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
                        HasPos ? Pos : Neg);
@@ -21174,7 +21181,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
 
   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
-  if (N0->getOpcode() == ISD::AND) {
+  if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 62f7f3d98ba6..0ff77d4ba1ab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) {
     if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
       return false;
 
+  // Casts and extractvalues may be trivially coalesced by fast-isel.
+  if (I->getOpcode() == Instruction::BitCast ||
+      I->getOpcode() == Instruction::PtrToInt ||
+      I->getOpcode() == Instruction::IntToPtr ||
+      I->getOpcode() == Instruction::ExtractValue)
+    return false;
+
   // Only instructions with a single use in the same basic block are considered
   // to have trivial kills.
   return I->hasOneUse() &&
-         !(I->getOpcode() == Instruction::BitCast ||
-           I->getOpcode() == Instruction::PtrToInt ||
-           I->getOpcode() == Instruction::IntToPtr) &&
          cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
 }
 
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6638ff6a6358..a6bd774934ac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9660,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
       // We will look through cast uses, so ignore them completely.
       if (I.isCast())
         continue;
-      // Ignore debug info intrinsics, they don't escape or store to allocas.
-      if (isa<DbgInfoIntrinsic>(I))
+      // Ignore debug info and pseudo op intrinsics, they don't escape or store
+      // to allocas.
+      if (I.isDebugOrPseudoInst())
         continue;
       // This is an unknown instruction. Assume it escapes or writes to all
       // static alloca operands.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5760132e44a0..b0ad86899d25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2012,7 +2012,7 @@ bool TargetLowering::SimplifyDemandedBits(
 
         const APInt *ShAmtC =
             TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
-        if (!ShAmtC)
+        if (!ShAmtC || ShAmtC->uge(BitWidth))
           break;
         uint64_t ShVal = ShAmtC->getZExtValue();
 
@@ -5935,6 +5935,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   SDLoc DL(Op);
 
+  // Because getNegatedExpression can delete nodes we need a handle to keep
+  // temporary nodes alive in case the recursion manages to create an identical
+  // node.
+  std::list<HandleSDNode> Handles;
+
   switch (Opcode) {
   case ISD::ConstantFP: {
     // Don't invert constant FP values after legalization unless the target says
@@ -6003,11 +6008,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     NegatibleCost CostX = NegatibleCost::Expensive;
     SDValue NegX =
         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+    // Prevent this node from being deleted by the next call.
+    if (NegX)
+      Handles.emplace_back(NegX);
+
     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
     NegatibleCost CostY = NegatibleCost::Expensive;
     SDValue NegY =
         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
 
+    // We're done with the handles.
+    Handles.clear();
+
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
@@ -6052,11 +6064,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     NegatibleCost CostX = NegatibleCost::Expensive;
     SDValue NegX =
         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+    // Prevent this node from being deleted by the next call.
+    if (NegX)
+      Handles.emplace_back(NegX);
+
     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
     NegatibleCost CostY = NegatibleCost::Expensive;
     SDValue NegY =
         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
 
+    // We're done with the handles.
+    Handles.clear();
+
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
@@ -6094,15 +6113,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (!NegZ)
       break;
 
+    // Prevent this node from being deleted by the next two calls.
+    Handles.emplace_back(NegZ);
+
     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
     NegatibleCost CostX = NegatibleCost::Expensive;
     SDValue NegX =
         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+    // Prevent this node from being deleted by the next call.
+    if (NegX)
+      Handles.emplace_back(NegX);
+
     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
     NegatibleCost CostY = NegatibleCost::Expensive;
     SDValue NegY =
         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
 
+    // We're done with the handles.
+    Handles.clear();
+
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = std::min(CostX, CostZ);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 0411faabbcc3..8d91afb6e99d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -192,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
       // Ignore intrinsics that do not become real instructions.
       // TODO: Narrow this to intrinsics that have store-like effects.
       const auto *CI = cast<CallInst>(I);
-      if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
+      if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
         return true;
       break;
     }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index ecee4aed7f88..2a9132bd2fe0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -801,8 +801,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
   MachineBasicBlock::iterator KillPos = KillMI;
   ++KillPos;
   for (MachineInstr &OtherMI : make_range(End, KillPos)) {
-    // Debug instructions cannot be counted against the limit.
-    if (OtherMI.isDebugInstr())
+    // Debug or pseudo instructions cannot be counted against the limit.
+    if (OtherMI.isDebugOrPseudoInstr())
       continue;
     if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
@@ -974,8 +974,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
   unsigned NumVisited = 0;
   for (MachineInstr &OtherMI :
        make_range(mi, MachineBasicBlock::iterator(KillMI))) {
-    // Debug instructions cannot be counted against the limit.
-    if (OtherMI.isDebugInstr())
+    // Debug or pseudo instructions cannot be counted against the limit.
+    if (OtherMI.isDebugOrPseudoInstr())
       continue;
     if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index dfdd2c6c669f..834d4cc8f514 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder(
   delete unwrap(JTMB);
 }
 
-void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
+void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
   delete unwrap(ObjLayer);
 }
 
diff --git a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
index 23e7af6287b6..7d83cf5dcf1d 100644
--- a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
@@ -937,6 +937,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
         return true;
       }
+    } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
+      rename(F);
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::ptr_annotation,
+                                        F->arg_begin()->getType());
+      return true;
     }
     break;
 
@@ -947,6 +953,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
 
+  case 'v': {
+    if (Name == "var.annotation" && F->arg_size() == 4) {
+      rename(F);
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::var_annotation);
+      return true;
+    }
+    break;
+  }
+
   case 'x':
     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
       return true;
@@ -3730,6 +3746,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     CI->eraseFromParent();
     return;
 
+  case Intrinsic::ptr_annotation:
+    // Upgrade from versions that lacked the annotation attribute argument.
+    assert(CI->getNumArgOperands() == 4 &&
+           "Before LLVM 12.0 this intrinsic took four arguments");
+    // Create a new call with an added null annotation attribute argument.
+    NewCall = Builder.CreateCall(
+        NewFn,
+        {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
+         CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+    NewCall->takeName(CI);
+    CI->replaceAllUsesWith(NewCall);
+    CI->eraseFromParent();
+    return;
+
+  case Intrinsic::var_annotation:
+    // Upgrade from versions that lacked the annotation attribute argument.
+    assert(CI->getNumArgOperands() == 4 &&
+           "Before LLVM 12.0 this intrinsic took four arguments");
+    // Create a new call with an added null annotation attribute argument.
+    NewCall = Builder.CreateCall(
+        NewFn,
+        {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
+         CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+    CI->eraseFromParent();
+    return;
+
   case Intrinsic::x86_xop_vfrcz_ss:
   case Intrinsic::x86_xop_vfrcz_sd:
     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
diff --git a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
index 03cb108cc485..95dd55237e5f 100644
--- a/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/ConstantFold.cpp
@@ -630,7 +630,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
           V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored)) {
         // Undefined behavior invoked - the destination type can't represent
         // the input constant.
-        return PoisonValue::get(DestTy);
+        return UndefValue::get(DestTy);
       }
       return ConstantInt::get(FPC->getContext(), IntVal);
     }
@@ -916,7 +916,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
 
   unsigned NumElts = ValTy->getNumElements();
   if (CIdx->uge(NumElts))
-    return PoisonValue::get(Val->getType());
+    return UndefValue::get(Val->getType());
 
   SmallVector<Constant*, 16> Result;
   Result.reserve(NumElts);
@@ -1151,21 +1151,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
     }
     case Instruction::SDiv:
     case Instruction::UDiv:
-      // X / undef -> poison
-      // X / 0 -> poison
-      if (match(C2, m_CombineOr(m_Undef(), m_Zero())))
-        return PoisonValue::get(C2->getType());
+      // X / undef -> undef
+      if (isa<UndefValue>(C2))
+        return C2;
+      // undef / 0 -> undef
       // undef / 1 -> undef
-      if (match(C2, m_One()))
+      if (match(C2, m_Zero()) || match(C2, m_One()))
         return C1;
       // undef / X -> 0       otherwise
       return Constant::getNullValue(C1->getType());
     case Instruction::URem:
     case Instruction::SRem:
-      // X % undef -> poison
-      // X % 0 -> poison
-      if (match(C2, m_CombineOr(m_Undef(), m_Zero())))
-        return PoisonValue::get(C2->getType());
+      // X % undef -> undef
+      if (match(C2, m_Undef()))
+        return C2;
+      // undef % 0 -> undef
+      if (match(C2, m_Zero()))
+        return C1;
       // undef % X -> 0       otherwise
       return Constant::getNullValue(C1->getType());
     case Instruction::Or:                          // X | undef -> -1
@@ -1173,28 +1175,28 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
         return C1;
       return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
     case Instruction::LShr:
-      // X >>l undef -> poison
+      // X >>l undef -> undef
       if (isa<UndefValue>(C2))
-        return PoisonValue::get(C2->getType());
+        return C2;
       // undef >>l 0 -> undef
       if (match(C2, m_Zero()))
         return C1;
       // undef >>l X -> 0
       return Constant::getNullValue(C1->getType());
     case Instruction::AShr:
-      // X >>a undef -> poison
+      // X >>a undef -> undef
       if (isa<UndefValue>(C2))
-        return PoisonValue::get(C2->getType());
+        return C2;
       // undef >>a 0 -> undef
       if (match(C2, m_Zero()))
         return C1;
-      // TODO: undef >>a X -> poison if the shift is exact
+      // TODO: undef >>a X -> undef if the shift is exact
       // undef >>a X -> 0
       return Constant::getNullValue(C1->getType());
     case Instruction::Shl:
       // X << undef -> undef
       if (isa<UndefValue>(C2))
-        return PoisonValue::get(C2->getType());
+        return C2;
       // undef << 0 -> undef
       if (match(C2, m_Zero()))
         return C1;
@@ -1247,14 +1249,14 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       if (CI2->isOne())
         return C1;                                            // X / 1 == X
       if (CI2->isZero())
-        return PoisonValue::get(CI2->getType());              // X / 0 == poison
+        return UndefValue::get(CI2->getType());               // X / 0 == undef
       break;
     case Instruction::URem:
     case Instruction::SRem:
       if (CI2->isOne())
         return Constant::getNullValue(CI2->getType());        // X % 1 == 0
       if (CI2->isZero())
-        return PoisonValue::get(CI2->getType());              // X % 0 == poison
+        return UndefValue::get(CI2->getType());               // X % 0 == undef
       break;
     case Instruction::And:
       if (CI2->isZero()) return C2;                           // X & 0 == 0
@@ -1368,7 +1370,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       case Instruction::SDiv:
         assert(!CI2->isZero() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
-          return PoisonValue::get(CI1->getType());   // MIN_INT / -1 -> poison
+          return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
         return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
       case Instruction::URem:
         assert(!CI2->isZero() && "Div by zero handled above");
@@ -1376,7 +1378,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       case Instruction::SRem:
         assert(!CI2->isZero() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
-          return PoisonValue::get(CI1->getType());   // MIN_INT % -1 -> poison
+          return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
         return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
       case Instruction::And:
         return ConstantInt::get(CI1->getContext(), C1V & C2V);
@@ -1387,15 +1389,15 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       case Instruction::Shl:
         if (C2V.ult(C1V.getBitWidth()))
           return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
-        return PoisonValue::get(C1->getType()); // too big shift is poison
+        return UndefValue::get(C1->getType()); // too big shift is undef
       case Instruction::LShr:
         if (C2V.ult(C1V.getBitWidth()))
           return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
-        return PoisonValue::get(C1->getType()); // too big shift is poison
+        return UndefValue::get(C1->getType()); // too big shift is undef
       case Instruction::AShr:
         if (C2V.ult(C1V.getBitWidth()))
           return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
-        return PoisonValue::get(C1->getType()); // too big shift is poison
+        return UndefValue::get(C1->getType()); // too big shift is undef
       }
     }
 
@@ -1441,7 +1443,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
     // Fast path for splatted constants.
     if (Constant *C2Splat = C2->getSplatValue()) {
       if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue())
-        return PoisonValue::get(VTy);
+        return UndefValue::get(VTy);
       if (Constant *C1Splat = C1->getSplatValue()) {
         return ConstantVector::getSplat(
             VTy->getElementCount(),
@@ -1458,9 +1460,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
         Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
         Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);
 
-        // If any element of a divisor vector is zero, the whole op is poison.
+        // If any element of a divisor vector is zero, the whole op is undef.
         if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
-          return PoisonValue::get(VTy);
+          return UndefValue::get(VTy);
 
         Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
       }
@@ -2343,8 +2345,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
     return PoisonValue::get(GEPTy);
 
   if (isa<UndefValue>(C))
-    // If inbounds, we can choose an out-of-bounds pointer as a base pointer.
-    return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy);
+    return UndefValue::get(GEPTy);
 
   Constant *Idx0 = cast<Constant>(Idxs[0]);
   if (Idxs.size() == 1 && (Idx0->isNullValue() || isa<UndefValue>(Idx0)))
diff --git a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
index 1e3fcd672a43..8e52dd3ddc71 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instruction.cpp
@@ -633,6 +633,16 @@ bool Instruction::isSafeToRemove() const {
          !this->isTerminator();
 }
 
+bool Instruction::willReturn() const {
+  if (const auto *CB = dyn_cast<CallBase>(this))
+    // FIXME: Temporarily assume that all side-effect free intrinsics will
+    // return. Remove this workaround once all intrinsics are appropriately
+    // annotated.
+    return CB->hasFnAttr(Attribute::WillReturn) ||
+           (isa<IntrinsicInst>(CB) && CB->onlyReadsMemory());
+  return true;
+}
+
 bool Instruction::isLifetimeStartOrEnd() const {
   auto II = dyn_cast<IntrinsicInst>(this);
   if (!II)
@@ -641,6 +651,10 @@ bool Instruction::isLifetimeStartOrEnd() const {
   return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
 }
 
+bool Instruction::isDebugOrPseudoInst() const {
+  return isa<DbgInfoIntrinsic>(this) || isa<PseudoProbeInst>(this);
+}
+
 const Instruction *
 Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
   for (const Instruction *I = getNextNode(); I; I = I->getNextNode())
diff --git a/contrib/llvm-project/llvm/lib/IR/Operator.cpp b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
index 0f70fc37dee2..69181f35827b 100644
--- a/contrib/llvm-project/llvm/lib/IR/Operator.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
@@ -61,10 +61,17 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
 bool GEPOperator::accumulateConstantOffset(
     const DataLayout &DL, APInt &Offset,
     function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
-   assert(Offset.getBitWidth() ==
-              DL.getIndexSizeInBits(getPointerAddressSpace()) &&
-          "The offset bit width does not match DL specification.");
+  assert(Offset.getBitWidth() ==
+             DL.getIndexSizeInBits(getPointerAddressSpace()) &&
+         "The offset bit width does not match DL specification.");
+  SmallVector<const Value *> Index(value_op_begin() + 1, value_op_end());
+  return GEPOperator::accumulateConstantOffset(getSourceElementType(), Index,
+                                               DL, Offset, ExternalAnalysis);
+}
 
+bool GEPOperator::accumulateConstantOffset(
+    Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
+    APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis) {
   bool UsedExternalAnalysis = false;
   auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool {
     Index = Index.sextOrTrunc(Offset.getBitWidth());
@@ -85,9 +92,10 @@ bool GEPOperator::accumulateConstantOffset(
     }
     return true;
   };
-
-  for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
-       GTI != GTE; ++GTI) {
+  auto begin = generic_gep_type_iterator<decltype(Index.begin())>::begin(
+      SourceType, Index.begin());
+  auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
+  for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
     // Scalable vectors are multiplied by a runtime constant.
     bool ScalableType = false;
     if (isa<ScalableVectorType>(GTI.getIndexedType()))
diff --git a/contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp b/contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp
index 804214f06e7a..80d2963938d4 100644
--- a/contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PseudoProbe.cpp
@@ -35,6 +35,9 @@ Optional<PseudoProbe> extractProbeFromDiscriminator(const Instruction &Inst) {
           PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
       Probe.Attr =
           PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
+      Probe.Factor =
+          PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
+          (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
       return Probe;
     }
   }
@@ -47,6 +50,8 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
     Probe.Id = II->getIndex()->getZExtValue();
     Probe.Type = (uint32_t)PseudoProbeType::Block;
     Probe.Attr = II->getAttributes()->getZExtValue();
+    Probe.Factor = II->getFactor()->getZExtValue() /
+                   (float)PseudoProbeFullDistributionFactor;
     return Probe;
   }
 
@@ -55,4 +60,40 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
 
   return None;
 }
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor) {
+  assert(Factor >= 0 && Factor <= 1 &&
+         "Distribution factor must be in [0, 1.0]");
+  if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+    IRBuilder<> Builder(&Inst);
+    uint64_t IntFactor = PseudoProbeFullDistributionFactor;
+    if (Factor < 1)
+      IntFactor *= Factor;
+    auto OrigFactor = II->getFactor()->getZExtValue();
+    if (IntFactor != OrigFactor)
+      II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
+  } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
+    if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+      const DILocation *DIL = DLoc;
+      auto Discriminator = DIL->getDiscriminator();
+      if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+        auto Index =
+            PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+        auto Type =
+            PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+        auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+            Discriminator);
+        // Round small factors to 0 to avoid over-counting.
+        uint32_t IntFactor =
+            PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+        if (Factor < 1)
+          IntFactor *= Factor;
+        uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+            Index, Type, Attr, IntFactor);
+        DIL = DIL->cloneWithDiscriminator(V);
+        Inst.setDebugLoc(DIL);
+      }
+    }
+  }
+}
 } // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
index 100e881c8fa8..6dd299ee9845 100644
--- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
@@ -1070,12 +1070,6 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
   if (auto *Params = N.getRawTemplateParams())
     visitTemplateParams(N, *Params);
 
-  if (N.getTag() == dwarf::DW_TAG_class_type ||
-      N.getTag() == dwarf::DW_TAG_union_type) {
-    AssertDI(N.getFile() && !N.getFile()->getFilename().empty(),
-             "class/union requires a filename", &N, N.getFile());
-  }
-
   if (auto *D = N.getRawDiscriminator()) {
     AssertDI(isa<DIDerivedType>(D) && N.getTag() == dwarf::DW_TAG_variant_part,
              "discriminator can only appear on variant part");
diff --git a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
index 69307b617552..2d810ffd350b 100644
--- a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp
@@ -1397,6 +1397,17 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
       if (TargetObjectWriter->getEMachine() == ELF::EM_386 &&
           Type == ELF::R_386_GOTOFF)
         return true;
+
+      // ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so
+      // it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an
+      // R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in
+      // range of a MergeInputSection. We could introduce a new RelExpr member
+      // (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12)
+      // but the complexity is unnecessary given that GNU as keeps the original
+      // symbol for this case as well.
+      if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS &&
+          !hasRelocationAddend())
+        return true;
     }
 
     // Most TLS relocations use a got, so they need the symbol. Even those that
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
index d4c4c6e01ef5..6c1a7c75d30a 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
@@ -1423,6 +1423,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
   // Now add the optimization pipeline.
   MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 
@@ -1477,6 +1480,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
   if (PTO.Coroutines)
     MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
index 860bfade733d..877cb9ed13b3 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
@@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f
 MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
 MODULE_PASS("memprof-module", ModuleMemProfilerPass())
 MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
 #undef MODULE_PASS
 
 #ifndef CGSCC_ANALYSIS
diff --git a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
index a8bfe02d4432..6795aed7b04e 100644
--- a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks(
   OptBisect.registerCallbacks(PIC);
   PreservedCFGChecker.registerCallbacks(PIC);
   PrintChangedIR.registerCallbacks(PIC);
+  PseudoProbeVerification.registerCallbacks(PIC);
   if (VerifyEach)
     Verify.registerCallbacks(PIC);
 }
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index a8cc308b4e3a..cdbcde50d33a 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -794,7 +794,6 @@ LineCoverageStats::LineCoverageStats(
     ExecutionCount = WrappedSegment->Count;
   if (!MinRegionCount)
     return;
-  ExecutionCount = 0;
   for (const auto *LS : LineSegments)
     if (isStartOfRegion(LS))
       ExecutionCount = std::max(ExecutionCount, LS->Count);
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index d2603097c550..0e03aa50173d 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -18,9 +18,14 @@
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
+cl::opt<bool> UseContextLessSummary(
+    "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
+    cl::desc("Merge context profiles before calculating thresholds."));
+
 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale
 // (which is 1000000) is a desired percentile of total counts.
 static const uint32_t DefaultCutoffsData[] = {
@@ -111,6 +116,35 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
       MaxFunctionCount, NumCounts, NumFunctions);
 }
 
+std::unique_ptr<ProfileSummary>
+SampleProfileSummaryBuilder::computeSummaryForProfiles(
+    const StringMap<sampleprof::FunctionSamples> &Profiles) {
+  assert(NumFunctions == 0 &&
+         "This can only be called on an empty summary builder");
+  StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
+  const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
+  // For CSSPGO, context-sensitive profile effectively split a function profile
+  // into many copies each representing the CFG profile of a particular calling
+  // context. That makes the count distribution looks more flat as we now have
+  // more function profiles each with lower counts, which in turn leads to lower
+  // hot thresholds. To compensate for that, by defauly we merge context
+  // profiles before coumputing profile summary.
+  if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
+                                !UseContextLessSummary.getNumOccurrences())) {
+    for (const auto &I : Profiles) {
+      ContextLessProfiles[I.second.getName()].merge(I.second);
+    }
+    ProfilesToUse = &ContextLessProfiles;
+  }
+
+  for (const auto &I : *ProfilesToUse) {
+    const sampleprof::FunctionSamples &Profile = I.second;
+    addRecord(Profile);
+  }
+
+  return getSummary();
+}
+
 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
   computeDetailedSummary();
   return std::make_unique<ProfileSummary>(
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
index c42931174bc0..38cbca844c87 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -222,8 +222,6 @@ std::error_code SampleProfileReaderText::readImpl() {
   sampleprof_error Result = sampleprof_error::success;
 
   InlineCallStack InlineStack;
-  int CSProfileCount = 0;
-  int RegularProfileCount = 0;
   uint32_t ProbeProfileCount = 0;
 
   // SeenMetadata tracks whether we have processed metadata for the current
@@ -257,11 +255,9 @@ std::error_code SampleProfileReaderText::readImpl() {
       SampleContext FContext(FName);
       if (FContext.hasContext())
         ++CSProfileCount;
-      else
-        ++RegularProfileCount;
       Profiles[FContext] = FunctionSamples();
       FunctionSamples &FProfile = Profiles[FContext];
-      FProfile.setName(FContext.getName());
+      FProfile.setName(FContext.getNameWithoutContext());
       FProfile.setContext(FContext);
       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
@@ -324,13 +320,14 @@ std::error_code SampleProfileReaderText::readImpl() {
     }
   }
 
-  assert((RegularProfileCount == 0 || CSProfileCount == 0) &&
+  assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
   ProfileIsCS = (CSProfileCount > 0);
   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
          "Cannot have both probe-based profiles and regular profiles");
   ProfileIsProbeBased = (ProbeProfileCount > 0);
   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
+  FunctionSamples::ProfileIsCS = ProfileIsCS;
 
   if (Result == sampleprof_error::success)
     computeSummary();
@@ -546,12 +543,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   if (std::error_code EC = FName.getError())
     return EC;
 
-  Profiles[*FName] = FunctionSamples();
-  FunctionSamples &FProfile = Profiles[*FName];
-  FProfile.setName(*FName);
-
+  SampleContext FContext(*FName);
+  Profiles[FContext] = FunctionSamples();
+  FunctionSamples &FProfile = Profiles[FContext];
+  FProfile.setName(FContext.getNameWithoutContext());
+  FProfile.setContext(FContext);
   FProfile.addHeadSamples(*NumHeadSamples);
 
+  if (FContext.hasContext())
+    CSProfileCount++;
+
   if (std::error_code EC = readProfile(FProfile))
     return EC;
   return sampleprof_error::success;
@@ -654,40 +655,44 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
         return EC;
     }
     assert(Data == End && "More data is read than expected");
-    return sampleprof_error::success;
-  }
-
-  if (Remapper) {
-    for (auto Name : FuncsToUse) {
-      Remapper->insert(Name);
+  } else {
+    if (Remapper) {
+      for (auto Name : FuncsToUse) {
+        Remapper->insert(Name);
+      }
     }
-  }
 
-  if (useMD5()) {
-    for (auto Name : FuncsToUse) {
-      auto GUID = std::to_string(MD5Hash(Name));
-      auto iter = FuncOffsetTable.find(StringRef(GUID));
-      if (iter == FuncOffsetTable.end())
-        continue;
-      const uint8_t *FuncProfileAddr = Start + iter->second;
-      assert(FuncProfileAddr < End && "out of LBRProfile section");
-      if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-        return EC;
-    }
-  } else {
-    for (auto NameOffset : FuncOffsetTable) {
-      auto FuncName = NameOffset.first;
-      if (!FuncsToUse.count(FuncName) &&
-          (!Remapper || !Remapper->exist(FuncName)))
-        continue;
-      const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-      assert(FuncProfileAddr < End && "out of LBRProfile section");
-      if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-        return EC;
+    if (useMD5()) {
+      for (auto Name : FuncsToUse) {
+        auto GUID = std::to_string(MD5Hash(Name));
+        auto iter = FuncOffsetTable.find(StringRef(GUID));
+        if (iter == FuncOffsetTable.end())
+          continue;
+        const uint8_t *FuncProfileAddr = Start + iter->second;
+        assert(FuncProfileAddr < End && "out of LBRProfile section");
+        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+          return EC;
+      }
+    } else {
+      for (auto NameOffset : FuncOffsetTable) {
+        SampleContext FContext(NameOffset.first);
+        auto FuncName = FContext.getNameWithoutContext();
+        if (!FuncsToUse.count(FuncName) &&
+            (!Remapper || !Remapper->exist(FuncName)))
+          continue;
+        const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+        assert(FuncProfileAddr < End && "out of LBRProfile section");
+        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+          return EC;
+      }
     }
+    Data = End;
   }
 
-  Data = End;
+  assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
+         "Cannot have both context-sensitive and regular profile");
+  ProfileIsCS = (CSProfileCount > 0);
+  FunctionSamples::ProfileIsCS = ProfileIsCS;
   return sampleprof_error::success;
 }
 
@@ -878,7 +883,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
   if (!ProfileIsProbeBased)
     return sampleprof_error::success;
-  for (unsigned I = 0; I < Profiles.size(); ++I) {
+  while (Data < End) {
     auto FName(readStringFromTable());
     if (std::error_code EC = FName.getError())
       return EC;
@@ -887,8 +892,14 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
     if (std::error_code EC = Checksum.getError())
       return EC;
 
-    Profiles[*FName].setFunctionHash(*Checksum);
+    SampleContext FContext(*FName);
+    // No need to load metadata for profiles that are not loaded in the current
+    // module.
+    if (Profiles.count(FContext))
+      Profiles[FContext].setFunctionHash(*Checksum);
   }
+
+  assert(Data == End && "More data is read than expected");
   return sampleprof_error::success;
 }
 
@@ -1599,9 +1610,5 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
 // profile. Binary format has the profile summary in its header.
 void SampleProfileReader::computeSummary() {
   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
-  for (const auto &I : Profiles) {
-    const FunctionSamples &Profile = I.second;
-    Builder.addRecord(Profile);
-  }
-  Summary = Builder.getSummary();
+  Summary = Builder.computeSummaryForProfiles(Profiles);
 }
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
index 71dba6281f76..8017f2a82804 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -147,7 +147,7 @@ std::error_code SampleProfileWriterExtBinaryBase::write(
 std::error_code
 SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
   uint64_t Offset = OutputStream->tell();
-  StringRef Name = S.getName();
+  StringRef Name = S.getNameWithContext(true);
   FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
   encodeULEB128(S.getHeadSamples(), *OutputStream);
   return writeBody(S);
@@ -360,10 +360,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
 /// it needs to be parsed by the SampleProfileReaderText class.
 std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
   auto &OS = *OutputStream;
-  if (FunctionSamples::ProfileIsCS)
-    OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples();
-  else
-    OS << S.getName() << ":" << S.getTotalSamples();
+  OS << S.getNameWithContext(true) << ":" << S.getTotalSamples();
   if (Indent == 0)
     OS << ":" << S.getHeadSamples();
   OS << "\n";
@@ -635,7 +632,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
 std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
   auto &OS = *OutputStream;
 
-  if (std::error_code EC = writeNameIdx(S.getName()))
+  if (std::error_code EC = writeNameIdx(S.getNameWithContext(true)))
     return EC;
 
   encodeULEB128(S.getTotalSamples(), OS);
@@ -752,9 +749,5 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
 void SampleProfileWriter::computeSummary(
     const StringMap<FunctionSamples> &ProfileMap) {
   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
-  for (const auto &I : ProfileMap) {
-    const FunctionSamples &Profile = I.second;
-    Builder.addRecord(Profile);
-  }
-  Summary = Builder.getSummary();
+  Summary = Builder.computeSummaryForProfiles(ProfileMap);
 }
diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
index 6d89481bf28a..e2f014d1815b 100644
--- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
@@ -1726,6 +1726,19 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
   }
 }
 
+void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
+                                 size_t FirstLineIndentedBy) {
+  const StringRef ValHelpPrefix = "  ";
+  assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size());
+  std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
+  outs().indent(BaseIndent - FirstLineIndentedBy)
+      << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";
+  while (!Split.second.empty()) {
+    Split = Split.second.split('\n');
+    outs().indent(BaseIndent + ValHelpPrefix.size()) << Split.first << "\n";
+  }
+}
+
 // Print out the option for the alias.
 void alias::printOptionInfo(size_t GlobalWidth) const {
   outs() << PrintArg(ArgStr);
@@ -1971,17 +1984,17 @@ void generic_parser_base::printOptionInfo(const Option &O,
       StringRef Description = getDescription(i);
       if (!shouldPrintOption(OptionName, Description, O))
         continue;
-      assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize);
-      size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize;
+      size_t FirstLineIndent = OptionName.size() + OptionPrefixesSize;
       outs() << OptionPrefix << OptionName;
       if (OptionName.empty()) {
         outs() << EmptyOption;
-        assert(NumSpaces >= EmptyOption.size());
-        NumSpaces -= EmptyOption.size();
+        assert(FirstLineIndent >= EmptyOption.size());
+        FirstLineIndent += EmptyOption.size();
       }
       if (!Description.empty())
-        outs().indent(NumSpaces) << ArgHelpPrefix << "  " << Description;
-      outs() << '\n';
+        Option::printEnumValHelpStr(Description, GlobalWidth, FirstLineIndent);
+      else
+        outs() << '\n';
     }
   } else {
     if (!O.HelpStr.empty())
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
index dc9bcf868381..adcbd1b5f8f3 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
@@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) {
 }
 
 static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
-  // First, check if the file is on a network (non-local) drive. If so, don't
-  // set DeleteFile to true, since it prevents opening the file for writes.
+  // Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a
+  // network file. On Windows 7 the function realPathFromHandle() below fails
+  // if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by
+  // a prior call.
+  FILE_DISPOSITION_INFO Disposition;
+  Disposition.DeleteFile = false;
+  if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
+                                  sizeof(Disposition)))
+    return mapWindowsError(::GetLastError());
+  if (!Delete)
+    return std::error_code();
+
+  // Check if the file is on a network (non-local) drive. If so, don't
+  // continue when DeleteFile is true, since it prevents opening the file for
+  // writes. Note -- this will leak temporary files on disk, but only when the
+  // target file is on a network drive.
   SmallVector<wchar_t, 128> FinalPath;
   if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
     return EC;
@@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
   if (!IsLocal)
     return std::error_code();
 
-  // The file is on a local drive, set the DeleteFile to true.
-  FILE_DISPOSITION_INFO Disposition;
-  Disposition.DeleteFile = Delete;
+  // The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
+  // flag.
+  Disposition.DeleteFile = true;
   if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
                                   sizeof(Disposition)))
     return mapWindowsError(::GetLastError());
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1be09186dc0a..1451151f4dc5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1017,11 +1017,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     // Vector reductions
     for (MVT VT : { MVT::v4f16, MVT::v2f32,
                     MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
-      setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+      if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
+        setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+        setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
 
-      if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())
         setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
+      }
     }
     for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
                     MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 37c924d879b1..68c721cb0d72 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -111,7 +111,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
   SupportsDebugInformation = true;
   CodePointerSize = 8;
 
-  CommentString = ";";
+  CommentString = "//";
   ExceptionsType = ExceptionHandling::WinEH;
   WinEHEncodingType = WinEH::EncodingType::Itanium;
 }
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 2628070f219c..cdb78aae1c4f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -75,17 +75,19 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
         MachineOperand &MovSrc = Def->getOperand(1);
         bool ConstantFolded = false;
 
-        if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
-                               isUInt<32>(MovSrc.getImm()))) {
-          Src0.ChangeToImmediate(MovSrc.getImm());
-          ConstantFolded = true;
-        } else if (MovSrc.isFI()) {
-          Src0.ChangeToFrameIndex(MovSrc.getIndex());
-          ConstantFolded = true;
-        } else if (MovSrc.isGlobal()) {
-          Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
-                          MovSrc.getTargetFlags());
-          ConstantFolded = true;
+        if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
+          if (MovSrc.isImm() &&
+              (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) {
+            Src0.ChangeToImmediate(MovSrc.getImm());
+            ConstantFolded = true;
+          } else if (MovSrc.isFI()) {
+            Src0.ChangeToFrameIndex(MovSrc.getIndex());
+            ConstantFolded = true;
+          } else if (MovSrc.isGlobal()) {
+            Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+                            MovSrc.getTargetFlags());
+            ConstantFolded = true;
+          }
         }
 
         if (ConstantFolded) {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 397979b4ab1e..598062672a56 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18661,6 +18661,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
              : AtomicExpansionKind::None;
 }
 
+// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used  up to 32
+// bits, and up to 64 bits on the non-M profiles.
 TargetLowering::AtomicExpansionKind
 ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -18668,9 +18670,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // on the stack and close enough to the spill slot, this can lead to a
   // situation where the monitor always gets cleared and the atomic operation
   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+  unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
   bool HasAtomicCmpXchg =
       !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
-  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
+      Size <= (Subtarget->isMClass() ? 32U : 64U))
     return AtomicExpansionKind::LLSC;
   return AtomicExpansionKind::None;
 }
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 693b0adaede4..2604218da160 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
         User->getMachineOpcode() != PPC::SELECT_I8)
       return false;
 
+    SDNode *Op1 = User->getOperand(1).getNode();
     SDNode *Op2 = User->getOperand(2).getNode();
+    // If we have a degenerate select with two equal operands, swapping will
+    // not do anything, and we may run into an infinite loop.
+    if (Op1 == Op2)
+      return false;
+
     if (!Op2->isMachineOpcode())
       return false;
 
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9215c17cb94b..929a72ac687e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8604,16 +8604,20 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
   // If it is a splat of a double, check if we can shrink it to a 32 bit
   // non-denormal float which when converted back to double gives us the same
-  // double. This is to exploit the XXSPLTIDP instruction.+  // If we lose precision, we use XXSPLTI32DX.
+  // double. This is to exploit the XXSPLTIDP instruction.
+  // If we lose precision, we use XXSPLTI32DX.
   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
       Subtarget.hasPrefixInstrs()) {
-    if (convertToNonDenormSingle(APSplatBits) &&
-        (Op->getValueType(0) == MVT::v2f64)) {
+    // Check the type first to short-circuit so we don't modify APSplatBits if
+    // this block isn't executed.
+    if ((Op->getValueType(0) == MVT::v2f64) &&
+        convertToNonDenormSingle(APSplatBits)) {
       SDValue SplatNode = DAG.getNode(
           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
       return DAG.getBitcast(Op.getValueType(), SplatNode);
-    } else { // We may lose precision, so we have to use XXSPLTI32DX.
+    } else {
+      // We may lose precision, so we have to use XXSPLTI32DX.
 
       uint32_t Hi =
           (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 477105bd03ac..0dda2c181572 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -987,6 +987,9 @@ namespace llvm {
     shouldExpandBuildVectorWithShuffles(EVT VT,
                                         unsigned DefinedValues) const override;
 
+    // Keep the zero-extensions for arguments to libcalls.
+    bool shouldKeepZExtForFP16Conv() const override { return true; }
+
     /// createFastISel - This method returns a target-specific FastISel object,
     /// or null if the target does not support "fast" instruction selection.
     FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index e7e590153605..dcf7525d7458 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2126,7 +2126,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
       if (getFeatureBits(RISCV::FeatureStdExtB))
         formalArchStr = (Twine(formalArchStr) + "_b0p93").str();
       if (getFeatureBits(RISCV::FeatureStdExtV))
-        formalArchStr = (Twine(formalArchStr) + "_v1p0").str();
+        formalArchStr = (Twine(formalArchStr) + "_v0p10").str();
       if (getFeatureBits(RISCV::FeatureExtZfh))
         formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str();
       if (getFeatureBits(RISCV::FeatureExtZba))
@@ -2152,9 +2152,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
       if (getFeatureBits(RISCV::FeatureExtZbt))
         formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str();
       if (getFeatureBits(RISCV::FeatureExtZvamo))
-        formalArchStr = (Twine(formalArchStr) + "_zvamo1p0").str();
+        formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str();
       if (getFeatureBits(RISCV::FeatureStdExtZvlsseg))
-        formalArchStr = (Twine(formalArchStr) + "_zvlsseg1p0").str();
+        formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str();
 
       getTargetStreamer().emitTextAttribute(Tag, formalArchStr);
     }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 72434a15bedb..13c4b84aa300 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -63,7 +63,7 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
   if (STI.hasFeature(RISCV::FeatureStdExtB))
     Arch += "_b0p93";
   if (STI.hasFeature(RISCV::FeatureStdExtV))
-    Arch += "_v1p0";
+    Arch += "_v0p10";
   if (STI.hasFeature(RISCV::FeatureExtZfh))
     Arch += "_zfh0p1";
   if (STI.hasFeature(RISCV::FeatureExtZba))
@@ -89,9 +89,9 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
   if (STI.hasFeature(RISCV::FeatureExtZbt))
     Arch += "_zbt0p93";
   if (STI.hasFeature(RISCV::FeatureExtZvamo))
-    Arch += "_zvamo1p0";
+    Arch += "_zvamo0p10";
   if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg))
-    Arch += "_zvlsseg1p0";
+    Arch += "_zvlsseg0p10";
 
   emitTextAttribute(RISCVAttrs::ARCH, Arch);
 }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
index 6a12f99b8903..ae32cbd1ae59 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
@@ -59,7 +59,8 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
     MachineInstr &MI = *MII++;
 
-    if (MI.getOpcode() != RISCV::PseudoVSETVLI) {
+    if (MI.getOpcode() != RISCV::PseudoVSETVLI &&
+        MI.getOpcode() != RISCV::PseudoVSETIVLI) {
       if (PrevVSETVLI &&
           (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
            MI.modifiesRegister(RISCV::VTYPE))) {
@@ -69,26 +70,48 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
       continue;
     }
 
-    // If we don't have a previous VSETVLI or the VL output isn't dead, we
+    // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we
     // can't remove this VSETVLI.
     if (!PrevVSETVLI || !MI.getOperand(0).isDead()) {
       PrevVSETVLI = &MI;
       continue;
     }
 
-    Register PrevAVLReg = PrevVSETVLI->getOperand(1).getReg();
-    Register AVLReg = MI.getOperand(1).getReg();
+    // If a previous "set vl" instruction opcode is different from this one, we
+    // can't differentiate the AVL values.
+    if (PrevVSETVLI->getOpcode() != MI.getOpcode()) {
+      PrevVSETVLI = &MI;
+      continue;
+    }
+
+    // The remaining two cases are
+    // 1. PrevVSETVLI = PseudoVSETVLI
+    //    MI = PseudoVSETVLI
+    //
+    // 2. PrevVSETVLI = PseudoVSETIVLI
+    //    MI = PseudoVSETIVLI
+    Register AVLReg;
+    bool SameAVL = false;
+    if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
+      AVLReg = MI.getOperand(1).getReg();
+      SameAVL = PrevVSETVLI->getOperand(1).getReg() == AVLReg;
+    } else { // RISCV::PseudoVSETIVLI
+      SameAVL =
+          PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm();
+    }
     int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm();
     int64_t VTYPEImm = MI.getOperand(2).getImm();
 
-    // Does this VSETVLI use the same AVL register and VTYPE immediate?
-    if (PrevAVLReg != AVLReg || PrevVTYPEImm != VTYPEImm) {
+    // Does this VSET{I}VLI use the same AVL register/value and VTYPE immediate?
+    if (!SameAVL || PrevVTYPEImm != VTYPEImm) {
       PrevVSETVLI = &MI;
       continue;
     }
 
     // If the AVLReg is X0 we need to look at the output VL of both VSETVLIs.
-    if (AVLReg == RISCV::X0) {
+    if ((MI.getOpcode() == RISCV::PseudoVSETVLI) && (AVLReg == RISCV::X0)) {
+      assert((PrevVSETVLI->getOpcode() == RISCV::PseudoVSETVLI) &&
+             "Unexpected vsetvli opcode.");
       Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
       Register OutVL = MI.getOperand(0).getReg();
       // We can't remove if the previous VSETVLI left VL unchanged and the
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 5f50892ca886..ec9a39569952 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -103,6 +103,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case RISCV::PseudoLA_TLS_GD:
     return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
   case RISCV::PseudoVSETVLI:
+  case RISCV::PseudoVSETIVLI:
     return expandVSetVL(MBB, MBBI);
   case RISCV::PseudoVMCLR_M_B1:
   case RISCV::PseudoVMCLR_M_B2:
@@ -217,9 +218,15 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
 
   DebugLoc DL = MBBI->getDebugLoc();
 
-  assert(MBBI->getOpcode() == RISCV::PseudoVSETVLI &&
+  assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+          MBBI->getOpcode() == RISCV::PseudoVSETIVLI) &&
          "Unexpected pseudo instruction");
-  const MCInstrDesc &Desc = TII->get(RISCV::VSETVLI);
+  unsigned Opcode;
+  if (MBBI->getOpcode() == RISCV::PseudoVSETVLI)
+    Opcode = RISCV::VSETVLI;
+  else
+    Opcode = RISCV::VSETIVLI;
+  const MCInstrDesc &Desc = TII->get(Opcode);
   assert(Desc.getNumOperands() == 3 && "Unexpected instruction format");
 
   Register DstReg = MBBI->getOperand(0).getReg();
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 7b0f38671f06..43bf16c53a62 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -569,12 +569,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
       SDValue VLOperand = Node->getOperand(2);
       if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
-        if (C->isNullValue()) {
-          VLOperand = SDValue(
-              CurDAG->getMachineNode(RISCV::ADDI, DL, XLenVT,
-                                     CurDAG->getRegister(RISCV::X0, XLenVT),
-                                     CurDAG->getTargetConstant(0, DL, XLenVT)),
-              0);
+        uint64_t AVL = C->getZExtValue();
+        if (isUInt<5>(AVL)) {
+          SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+          ReplaceNode(Node,
+                      CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
+                                             MVT::Other, VLImm, VTypeIOp,
+                                             /* Chain */ Node->getOperand(0)));
+          return;
         }
       }
 
@@ -824,93 +826,6 @@ bool RISCVDAGToDAGISel::MatchSRLIW(SDNode *N) const {
   return (Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff;
 }
 
-// Check that it is a SLOI (Shift Left Ones Immediate). A PatFrag has already
-// determined it has the right structure:
-//
-//  (OR (SHL RS1, VC2), VC1)
-//
-// Check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-//  VC1 == maskTrailingOnes(VC2)
-//
-bool RISCVDAGToDAGISel::MatchSLOI(SDNode *N) const {
-  assert(N->getOpcode() == ISD::OR);
-  assert(N->getOperand(0).getOpcode() == ISD::SHL);
-  assert(isa<ConstantSDNode>(N->getOperand(1)));
-  assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
-  SDValue Shl = N->getOperand(0);
-  if (Subtarget->is64Bit()) {
-    uint64_t VC1 = N->getConstantOperandVal(1);
-    uint64_t VC2 = Shl.getConstantOperandVal(1);
-    return VC1 == maskTrailingOnes<uint64_t>(VC2);
-  }
-
-  uint32_t VC1 = N->getConstantOperandVal(1);
-  uint32_t VC2 = Shl.getConstantOperandVal(1);
-  return VC1 == maskTrailingOnes<uint32_t>(VC2);
-}
-
-// Check that it is a SROI (Shift Right Ones Immediate). A PatFrag has already
-// determined it has the right structure:
-//
-//  (OR (SRL RS1, VC2), VC1)
-//
-// Check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-//  VC1 == maskLeadingOnes(VC2)
-//
-bool RISCVDAGToDAGISel::MatchSROI(SDNode *N) const {
-  assert(N->getOpcode() == ISD::OR);
-  assert(N->getOperand(0).getOpcode() == ISD::SRL);
-  assert(isa<ConstantSDNode>(N->getOperand(1)));
-  assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
-  SDValue Srl = N->getOperand(0);
-  if (Subtarget->is64Bit()) {
-    uint64_t VC1 = N->getConstantOperandVal(1);
-    uint64_t VC2 = Srl.getConstantOperandVal(1);
-    return VC1 == maskLeadingOnes<uint64_t>(VC2);
-  }
-
-  uint32_t VC1 = N->getConstantOperandVal(1);
-  uint32_t VC2 = Srl.getConstantOperandVal(1);
-  return VC1 == maskLeadingOnes<uint32_t>(VC2);
-}
-
-// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). A PatFrag
-// has already determined it has the right structure:
-//
-//  (OR (SRL RS1, VC2), VC1)
-//
-// and then we check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-//  VC2 < 32
-//  VC1 == maskTrailingZeros<uint64_t>(32 - VC2)
-//
-bool RISCVDAGToDAGISel::MatchSROIW(SDNode *N) const {
-  assert(N->getOpcode() == ISD::OR);
-  assert(N->getOperand(0).getOpcode() == ISD::SRL);
-  assert(isa<ConstantSDNode>(N->getOperand(1)));
-  assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
-  // The IsRV64 predicate is checked after PatFrag predicates so we can get
-  // here even on RV32.
-  if (!Subtarget->is64Bit())
-    return false;
-
-  SDValue Srl = N->getOperand(0);
-  uint64_t VC1 = N->getConstantOperandVal(1);
-  uint64_t VC2 = Srl.getConstantOperandVal(1);
-
-  // Immediate range should be enforced by uimm5 predicate.
-  assert(VC2 < 32 && "Unexpected immediate");
-  return VC1 == maskTrailingZeros<uint64_t>(32 - VC2);
-}
-
 // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
 // on RV64).
 // SLLIUW is the same as SLLI except for the fact that it clears the bits
@@ -946,6 +861,23 @@ bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const {
   return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF);
 }
 
+// X0 has special meaning for vsetvl/vsetvli.
+//  rd | rs1 |   AVL value | Effect on vl
+//--------------------------------------------------------------
+// !X0 |  X0 |       VLMAX | Set vl to VLMAX
+//  X0 |  X0 | Value in vl | Keep current vl, just change vtype.
+bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
+  // If the VL value is a constant 0, manually select it to an ADDI with 0
+  // immediate to prevent the default selection path from matching it to X0.
+  auto *C = dyn_cast<ConstantSDNode>(N);
+  if (C && C->isNullValue())
+    VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0);
+  else
+    VL = N;
+
+  return true;
+}
+
 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
   if (N.getOpcode() != ISD::SPLAT_VECTOR &&
       N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 23601c3b8f06..6099586d049d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -46,11 +46,10 @@ public:
   bool SelectAddrFI(SDValue Addr, SDValue &Base);
 
   bool MatchSRLIW(SDNode *N) const;
-  bool MatchSLOI(SDNode *N) const;
-  bool MatchSROI(SDNode *N) const;
-  bool MatchSROIW(SDNode *N) const;
   bool MatchSLLIUW(SDNode *N) const;
 
+  bool selectVLOp(SDValue N, SDValue &VL);
+
   bool selectVSplat(SDValue N, SDValue &SplatVal);
   bool selectVSplatSimm5(SDValue N, SDValue &SplatVal);
   bool selectVSplatUimm5(SDValue N, SDValue &SplatVal);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
index 147993127e78..80f46b73bfd7 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
@@ -38,9 +38,11 @@ class RISCVLSUMOP<bits<5> val> {
   bits<5> Value = val;
 }
 def LUMOPUnitStride  : RISCVLSUMOP<0b00000>;
+def LUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
 def LUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
 def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>;
 def SUMOPUnitStride  : RISCVLSUMOP<0b00000>;
+def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
 def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
 
 class RISCVAMOOP<bits<5> val> {
@@ -63,10 +65,23 @@ def LSWidth8     : RISCVWidth<0b0000>;
 def LSWidth16    : RISCVWidth<0b0101>;
 def LSWidth32    : RISCVWidth<0b0110>;
 def LSWidth64    : RISCVWidth<0b0111>;
-def LSWidth128   : RISCVWidth<0b1000>;
-def LSWidth256   : RISCVWidth<0b1101>;
-def LSWidth512   : RISCVWidth<0b1110>;
-def LSWidth1024  : RISCVWidth<0b1111>;
+
+class RVInstSetiVLi<dag outs, dag ins, string opcodestr, string argstr>
+    : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+  bits<5> uimm;
+  bits<5> rd;
+  bits<10> vtypei;
+
+  let Inst{31} = 1;
+  let Inst{30} = 1;
+  let Inst{29-20} = vtypei{9-0};
+  let Inst{19-15} = uimm;
+  let Inst{14-12} = 0b111;
+  let Inst{11-7} = rd;
+  let Opcode = OPC_OP_V.Value;
+
+  let Defs = [VTYPE, VL];
+}
 
 class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr>
     : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 1bc288b5177c..7888ac7bac8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -45,25 +45,6 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
   }];
 }
 
-
-// Check that it is a SLOI (Shift Left Ones Immediate).
-def SLOIPat : PatFrag<(ops node:$A, node:$B),
-                      (or (shl node:$A, node:$B), imm), [{
-  return MatchSLOI(N);
-}]>;
-
-// Check that it is a SROI (Shift Right Ones Immediate).
-def SROIPat : PatFrag<(ops node:$A, node:$B),
-                      (or (srl node:$A, node:$B), imm), [{
-  return MatchSROI(N);
-}]>;
-
-// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
-def SROIWPat : PatFrag<(ops node:$A, node:$B),
-                       (or (srl node:$A, node:$B), imm), [{
-  return MatchSROIW(N);
-}]>;
-
 // Checks if this mask has a single 0 bit and cannot be used with ANDI.
 def BCLRMask : ImmLeaf<XLenVT, [{
   if (Subtarget->is64Bit())
@@ -210,11 +191,6 @@ def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">, Sched<[]>;
 def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[]>;
 } // Predicates = [HasStdExtZba]
 
-let Predicates = [HasStdExtZbp] in {
-def SLO  : ALU_rr<0b0010000, 0b001, "slo">, Sched<[]>;
-def SRO  : ALU_rr<0b0010000, 0b101, "sro">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
 let Predicates = [HasStdExtZbbOrZbp] in {
 def ROL   : ALU_rr<0b0110000, 0b001, "rol">, Sched<[]>;
 def ROR   : ALU_rr<0b0110000, 0b101, "ror">, Sched<[]>;
@@ -238,11 +214,6 @@ def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>;
 def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
 } // Predicates = [HasStdExtZbp]
 
-let Predicates = [HasStdExtZbp] in {
-def SLOI : RVBShift_ri<0b00100, 0b001, OPC_OP_IMM, "sloi">, Sched<[]>;
-def SROI : RVBShift_ri<0b00100, 0b101, OPC_OP_IMM, "sroi">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
 let Predicates = [HasStdExtZbbOrZbp] in
 def RORI  : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[]>;
 
@@ -369,11 +340,6 @@ def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, Sched<[]>;
 def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[]>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SLOW   : ALUW_rr<0b0010000, 0b001, "slow">, Sched<[]>;
-def SROW   : ALUW_rr<0b0010000, 0b101, "srow">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
 def ROLW  : ALUW_rr<0b0110000, 0b001, "rolw">, Sched<[]>;
 def RORW  : ALUW_rr<0b0110000, 0b101, "rorw">, Sched<[]>;
@@ -395,11 +361,6 @@ let Predicates = [HasStdExtZbp, IsRV64] in {
 def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SLOIW  : RVBShiftW_ri<0b0010000, 0b001, OPC_OP_IMM_32, "sloiw">, Sched<[]>;
-def SROIW  : RVBShiftW_ri<0b0010000, 0b101, OPC_OP_IMM_32, "sroiw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
 def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[]>;
 
@@ -673,13 +634,6 @@ def : Pat<(or  GPR:$rs1, (not GPR:$rs2)), (ORN  GPR:$rs1, GPR:$rs2)>;
 def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbbOrZbp]
 
-let Predicates = [HasStdExtZbp] in {
-def : Pat<(not (shiftop<shl> (not GPR:$rs1), GPR:$rs2)),
-          (SLO GPR:$rs1, GPR:$rs2)>;
-def : Pat<(not (shiftop<srl> (not GPR:$rs1), GPR:$rs2)),
-          (SRO GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp]
-
 let Predicates = [HasStdExtZbbOrZbp] in {
 def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
 def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
@@ -710,13 +664,6 @@ def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
           (BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
 }
 
-let Predicates = [HasStdExtZbp] in {
-def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
-          (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
-def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
-          (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
-} // Predicates = [HasStdExtZbp]
-
 // There's no encoding for roli in the the 'B' extension as it can be
 // implemented with rori by negating the immediate.
 let Predicates = [HasStdExtZbbOrZbp] in {
@@ -936,13 +883,6 @@ def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 3)), GPR:$rs2),
           (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZba, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(not (shiftopw<riscv_sllw> (not GPR:$rs1), GPR:$rs2)),
-          (SLOW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(not (shiftopw<riscv_srlw> (not GPR:$rs1), GPR:$rs2)),
-          (SROW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
 def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2),
           (ROLW GPR:$rs1, GPR:$rs2)>;
@@ -983,13 +923,6 @@ def : Pat<(xor (assertsexti32 GPR:$rs1), BSETINVWMask:$mask),
 } // Predicates = [HasStdExtZbs, IsRV64]
 
 let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(sext_inreg (SLOIPat GPR:$rs1, uimm5:$shamt), i32),
-          (SLOIW GPR:$rs1, uimm5:$shamt)>;
-def : Pat<(SROIWPat GPR:$rs1, uimm5:$shamt),
-          (SROIW GPR:$rs1, uimm5:$shamt)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
 def : Pat<(riscv_rorw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
 def : Pat<(riscv_rolw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
 def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 4f9e9cfbdb98..b3fc76aee161 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// This file describes the RISC-V instructions from the standard 'V' Vector
-/// extension, version 0.9.
+/// extension, version 0.10.
 /// This version is still experimental as the 'V' extension hasn't been
 /// ratified yet.
 ///
@@ -82,6 +82,12 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
 //===----------------------------------------------------------------------===//
 
 let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+// load vd, (rs1)
+class VUnitStrideLoadMask<string opcodestr>
+    : RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0},
+                (outs VR:$vd),
+                (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">;
+
 // load vd, (rs1), vm
 class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width,
                       string opcodestr>
@@ -138,6 +144,12 @@ class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
 // store vd, vs3, (rs1), vm
+class VUnitStrideStoreMask<string opcodestr>
+    : RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0},
+                (outs), (ins VR:$vs3, GPR:$rs1), opcodestr,
+                "$vs3, (${rs1})">;
+
+// store vd, vs3, (rs1), vm
 class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width,
                          string opcodestr>
     : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0},
@@ -423,10 +435,6 @@ multiclass VWholeLoad<bits<3> nf, string opcodestr> {
   def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v">;
   def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v">;
   def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v">;
-  def E128_V : VWholeLoad<nf, LSWidth128, opcodestr # "e128.v">;
-  def E256_V : VWholeLoad<nf, LSWidth256, opcodestr # "e256.v">;
-  def E512_V : VWholeLoad<nf, LSWidth512, opcodestr # "e512.v">;
-  def E1024_V : VWholeLoad<nf, LSWidth1024, opcodestr # "e1024.v">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -438,6 +446,9 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
 def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei),
                            "vsetvli", "$rd, $rs1, $vtypei">;
 
+def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei),
+                             "vsetivli", "$rd, $uimm, $vtypei">;
+
 def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
                          "vsetvl", "$rd, $rs1, $rs2">;
 } // hasSideEffects = 1, mayLoad = 0, mayStore = 0
@@ -447,47 +458,30 @@ def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">;
 def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">;
 def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">;
 def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">;
-def VLE128_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth128, "vle128.v">;
-def VLE256_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth256, "vle256.v">;
-def VLE512_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth512, "vle512.v">;
-def VLE1024_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth1024, "vle1024.v">;
 
 def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">;
 def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">;
 def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">;
 def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">;
-def VLE128FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth128, "vle128ff.v">;
-def VLE256FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth256, "vle256ff.v">;
-def VLE512FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth512, "vle512ff.v">;
-def VLE1024FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth1024, "vle1024ff.v">;
+
+def VLE1_V : VUnitStrideLoadMask<"vle1.v">;
+def VSE1_V : VUnitStrideStoreMask<"vse1.v">;
 
 def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">;
 def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">;
 def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">;
 def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">;
-def VSE128_V : VUnitStrideStore<SUMOPUnitStride, LSWidth128, "vse128.v">;
-def VSE256_V : VUnitStrideStore<SUMOPUnitStride, LSWidth256, "vse256.v">;
-def VSE512_V : VUnitStrideStore<SUMOPUnitStride, LSWidth512, "vse512.v">;
-def VSE1024_V : VUnitStrideStore<SUMOPUnitStride, LSWidth1024, "vse1024.v">;
 
 // Vector Strided Instructions
 def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">;
 def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">;
 def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">;
 def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">;
-def VLSE128_V : VStridedLoad<LSWidth128, "vlse128.v">;
-def VLSE256_V : VStridedLoad<LSWidth256, "vlse256.v">;
-def VLSE512_V : VStridedLoad<LSWidth512, "vlse512.v">;
-def VLSE1024_V : VStridedLoad<LSWidth1024, "vlse1024.v">;
 
 def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">;
 def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">;
 def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">;
 def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">;
-def VSSE128_V : VStridedStore<LSWidth128, "vsse128.v">;
-def VSSE256_V : VStridedStore<LSWidth256, "vsse256.v">;
-def VSSE512_V : VStridedStore<LSWidth512, "vsse512.v">;
-def VSSE1024_V : VStridedStore<LSWidth1024, "vsse1024.v">;
 
 // Vector Indexed Instructions
 def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">;
@@ -510,19 +504,19 @@ def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
 def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
 def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
 
-defm VL1R : VWholeLoad<1, "vl1r">;
-defm VL2R : VWholeLoad<2, "vl2r">;
-defm VL4R : VWholeLoad<4, "vl4r">;
-defm VL8R : VWholeLoad<8, "vl8r">;
+defm VL1R : VWholeLoad<0, "vl1r">;
+defm VL2R : VWholeLoad<1, "vl2r">;
+defm VL4R : VWholeLoad<3, "vl4r">;
+defm VL8R : VWholeLoad<7, "vl8r">;
 def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>;
 
-def VS1R_V : VWholeStore<1, "vs1r.v">;
-def VS2R_V : VWholeStore<2, "vs2r.v">;
-def VS4R_V : VWholeStore<4, "vs4r.v">;
-def VS8R_V : VWholeStore<8, "vs8r.v">;
+def VS1R_V : VWholeStore<0, "vs1r.v">;
+def VS2R_V : VWholeStore<1, "vs2r.v">;
+def VS4R_V : VWholeStore<3, "vs4r.v">;
+def VS8R_V : VWholeStore<7, "vs8r.v">;
 
 // Vector Single-Width Integer Add and Subtract
 defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
@@ -806,8 +800,8 @@ defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
 
 // Vector Floating-Point Square-Root Instruction
 defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
-defm VFRSQRTE7_V : VALU_FV_VS2<"vfrsqrte7.v", 0b010011, 0b00100>;
-defm VFRECE7_V : VALU_FV_VS2<"vfrece7.v", 0b010011, 0b00101>;
+defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
+defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
 
 // Vector Floating-Point MIN/MAX Instructions
 defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
@@ -1058,47 +1052,27 @@ let Predicates = [HasStdExtZvlsseg] in {
     def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">;
     def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">;
     def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">;
-    def VLSEG#nf#E128_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth128, "vlseg"#nf#"e128.v">;
-    def VLSEG#nf#E256_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth256, "vlseg"#nf#"e256.v">;
-    def VLSEG#nf#E512_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth512, "vlseg"#nf#"e512.v">;
-    def VLSEG#nf#E1024_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth1024, "vlseg"#nf#"e1024.v">;
 
     def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">;
     def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">;
     def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">;
     def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">;
-    def VLSEG#nf#E128FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth128, "vlseg"#nf#"e128ff.v">;
-    def VLSEG#nf#E256FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth256, "vlseg"#nf#"e256ff.v">;
-    def VLSEG#nf#E512FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth512, "vlseg"#nf#"e512ff.v">;
-    def VLSEG#nf#E1024FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth1024, "vlseg"#nf#"e1024ff.v">;
 
     def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">;
     def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">;
     def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">;
     def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
-    def VSSEG#nf#E128_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth128, "vsseg"#nf#"e128.v">;
-    def VSSEG#nf#E256_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth256, "vsseg"#nf#"e256.v">;
-    def VSSEG#nf#E512_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth512, "vsseg"#nf#"e512.v">;
-    def VSSEG#nf#E1024_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth1024, "vsseg"#nf#"e1024.v">;
 
     // Vector Strided Instructions
     def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">;
     def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">;
     def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">;
     def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
-    def VLSSEG#nf#E128_V : VStridedSegmentLoad<!add(nf, -1), LSWidth128, "vlsseg"#nf#"e128.v">;
-    def VLSSEG#nf#E256_V : VStridedSegmentLoad<!add(nf, -1), LSWidth256, "vlsseg"#nf#"e256.v">;
-    def VLSSEG#nf#E512_V : VStridedSegmentLoad<!add(nf, -1), LSWidth512, "vlsseg"#nf#"e512.v">;
-    def VLSSEG#nf#E1024_V : VStridedSegmentLoad<!add(nf, -1), LSWidth1024, "vlsseg"#nf#"e1024.v">;
 
     def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">;
     def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">;
     def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">;
     def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
-    def VSSSEG#nf#E128_V : VStridedSegmentStore<!add(nf, -1), LSWidth128, "vssseg"#nf#"e128.v">;
-    def VSSSEG#nf#E256_V : VStridedSegmentStore<!add(nf, -1), LSWidth256, "vssseg"#nf#"e256.v">;
-    def VSSSEG#nf#E512_V : VStridedSegmentStore<!add(nf, -1), LSWidth512, "vssseg"#nf#"e512.v">;
-    def VSSSEG#nf#E1024_V : VStridedSegmentStore<!add(nf, -1), LSWidth1024, "vssseg"#nf#"e1024.v">;
 
     // Vector Indexed Instructions
     def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
@@ -1109,14 +1083,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vluxseg"#nf#"ei32.v">;
     def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
                               LSWidth64, "vluxseg"#nf#"ei64.v">;
-    def VLUXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth128, "vluxseg"#nf#"ei128.v">;
-    def VLUXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth256, "vluxseg"#nf#"ei256.v">;
-    def VLUXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth512, "vluxseg"#nf#"ei512.v">;
-    def VLUXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth1024, "vluxseg"#nf#"ei1024.v">;
 
     def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
                              LSWidth8, "vloxseg"#nf#"ei8.v">;
@@ -1126,14 +1092,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vloxseg"#nf#"ei32.v">;
     def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
                               LSWidth64, "vloxseg"#nf#"ei64.v">;
-    def VLOXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth128, "vloxseg"#nf#"ei128.v">;
-    def VLOXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth256, "vloxseg"#nf#"ei256.v">;
-    def VLOXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth512, "vloxseg"#nf#"ei512.v">;
-    def VLOXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth1024, "vloxseg"#nf#"ei1024.v">;
 
     def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
                              LSWidth8, "vsuxseg"#nf#"ei8.v">;
@@ -1143,14 +1101,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vsuxseg"#nf#"ei32.v">;
     def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
                               LSWidth64, "vsuxseg"#nf#"ei64.v">;
-    def VSUXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                               LSWidth128, "vsuxseg"#nf#"ei128.v">;
-    def VSUXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                               LSWidth256, "vsuxseg"#nf#"ei256.v">;
-    def VSUXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                               LSWidth512, "vsuxseg"#nf#"ei512.v">;
-    def VSUXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                                LSWidth1024, "vsuxseg"#nf#"ei1024.v">;
 
     def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
                              LSWidth8, "vsoxseg"#nf#"ei8.v">;
@@ -1160,14 +1110,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vsoxseg"#nf#"ei32.v">;
     def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
                               LSWidth64, "vsoxseg"#nf#"ei64.v">;
-    def VSOXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                               LSWidth128, "vsoxseg"#nf#"ei128.v">;
-    def VSOXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                               LSWidth256, "vsoxseg"#nf#"ei256.v">;
-    def VSOXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                               LSWidth512, "vsoxseg"#nf#"ei512.v">;
-    def VSOXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                                LSWidth1024, "vsoxseg"#nf#"ei1024.v">;
   }
 } // Predicates = [HasStdExtZvlsseg]
 
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 06e4d053d5d7..60bd1b24cab8 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// This file contains the required infrastructure to support code generation
-/// for the standard 'V' (Vector) extension, version 0.9.  This version is still
+/// for the standard 'V' (Vector) extension, version 0.10.  This version is still
 /// experimental as the 'V' extension hasn't been ratified yet.
 ///
 /// This file is included from RISCVInstrInfoV.td
@@ -42,17 +42,7 @@ def riscv_read_vl : SDNode<"RISCVISD::READ_VL",
 //--------------------------------------------------------------
 // !X0 |  X0 |       VLMAX | Set vl to VLMAX
 //  X0 |  X0 | Value in vl | Keep current vl, just change vtype.
-def NoX0 : SDNodeXForm<undef,
-[{
-  auto *C = dyn_cast<ConstantSDNode>(N);
-  if (C && C->isNullValue()) {
-    SDLoc DL(N);
-    return SDValue(CurDAG->getMachineNode(RISCV::ADDI, DL, Subtarget->getXLenVT(),
-                   CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()),
-                   CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT())), 0);
-  }
-  return SDValue(N, 0);
-}]>;
+def VLOp : ComplexPattern<XLenVT, 1, "selectVLOp">;
 
 def DecImm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue() - 1, SDLoc(N),
@@ -1228,6 +1218,14 @@ multiclass VPseudoUSLoad {
   }
 }
 
+multiclass VPseudoLoadMask {
+  foreach mti = AllMasks in {
+    let VLMul = mti.LMul.value in {
+      def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR>;
+    }
+  }
+}
+
 multiclass VPseudoSLoad {
   foreach lmul = MxList.m in {
     defvar LInfo = lmul.MX;
@@ -1264,6 +1262,14 @@ multiclass VPseudoUSStore {
   }
 }
 
+multiclass VPseudoStoreMask {
+  foreach mti = AllMasks in {
+    let VLMul = mti.LMul.value in {
+      def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR>;
+    }
+  }
+}
+
 multiclass VPseudoSStore {
   foreach lmul = MxList.m in {
     defvar LInfo = lmul.MX;
@@ -1951,10 +1957,10 @@ class VPatUnaryNoMask<string intrinsic_name,
                       VReg op2_reg_class> :
   Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
                    (op2_type op2_reg_class:$rs2),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                    (op2_type op2_reg_class:$rs2),
-                   (NoX0 GPR:$vl), sew)>;
+                   GPR:$vl, sew)>;
 
 class VPatUnaryMask<string intrinsic_name,
                     string inst,
@@ -1970,21 +1976,21 @@ class VPatUnaryMask<string intrinsic_name,
                    (result_type result_reg_class:$merge),
                    (op2_type op2_reg_class:$rs2),
                    (mask_type V0),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
                    (result_type result_reg_class:$merge),
                    (op2_type op2_reg_class:$rs2),
-                   (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                   (mask_type V0), GPR:$vl, sew)>;
 
 class VPatMaskUnaryNoMask<string intrinsic_name,
                           string inst,
                           MTypeInfo mti> :
   Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name)
                 (mti.Mask VR:$rs2),
-                (XLenVT GPR:$vl))),
+                (XLenVT (VLOp GPR:$vl)))),
                 (!cast<Instruction>(inst#"_M_"#mti.BX)
                 (mti.Mask VR:$rs2),
-                (NoX0 GPR:$vl), mti.SEW)>;
+                GPR:$vl, mti.SEW)>;
 
 class VPatMaskUnaryMask<string intrinsic_name,
                         string inst,
@@ -1993,11 +1999,11 @@ class VPatMaskUnaryMask<string intrinsic_name,
                 (mti.Mask VR:$merge),
                 (mti.Mask VR:$rs2),
                 (mti.Mask V0),
-                (XLenVT GPR:$vl))),
+                (XLenVT (VLOp GPR:$vl)))),
                 (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK")
                 (mti.Mask VR:$merge),
                 (mti.Mask VR:$rs2),
-                (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
+                (mti.Mask V0), GPR:$vl, mti.SEW)>;
 
 class VPatUnaryAnyMask<string intrinsic,
                        string inst,
@@ -2013,12 +2019,12 @@ class VPatUnaryAnyMask<string intrinsic,
                    (result_type result_reg_class:$merge),
                    (op1_type op1_reg_class:$rs1),
                    (mask_type VR:$rs2),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                    (result_type result_reg_class:$merge),
                    (op1_type op1_reg_class:$rs1),
                    (mask_type VR:$rs2),
-                   (NoX0 GPR:$vl), sew)>;
+                   GPR:$vl, sew)>;
 
 class VPatBinaryNoMask<string intrinsic_name,
                        string inst,
@@ -2031,11 +2037,11 @@ class VPatBinaryNoMask<string intrinsic_name,
   Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst)
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
-                   (NoX0 GPR:$vl), sew)>;
+                   GPR:$vl, sew)>;
 
 class VPatBinaryMask<string intrinsic_name,
                      string inst,
@@ -2052,12 +2058,12 @@ class VPatBinaryMask<string intrinsic_name,
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
                    (mask_type V0),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_MASK")
                    (result_type result_reg_class:$merge),
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
-                   (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                   (mask_type V0), GPR:$vl, sew)>;
 
 class VPatTernaryNoMask<string intrinsic,
                         string inst,
@@ -2075,12 +2081,12 @@ class VPatTernaryNoMask<string intrinsic,
                     (result_type result_reg_class:$rs3),
                     (op1_type op1_reg_class:$rs1),
                     (op2_type op2_kind:$rs2),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                     result_reg_class:$rs3,
                     (op1_type op1_reg_class:$rs1),
                     op2_kind:$rs2,
-                    (NoX0 GPR:$vl), sew)>;
+                    GPR:$vl, sew)>;
 
 class VPatTernaryMask<string intrinsic,
                       string inst,
@@ -2099,13 +2105,13 @@ class VPatTernaryMask<string intrinsic,
                     (op1_type op1_reg_class:$rs1),
                     (op2_type op2_kind:$rs2),
                     (mask_type V0),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
                     result_reg_class:$rs3,
                     (op1_type op1_reg_class:$rs1),
                     op2_kind:$rs2,
                     (mask_type V0),
-                    (NoX0 GPR:$vl), sew)>;
+                    GPR:$vl, sew)>;
 
 class VPatAMOWDNoMask<string intrinsic_name,
                     string inst,
@@ -2119,10 +2125,10 @@ class VPatAMOWDNoMask<string intrinsic_name,
                     GPR:$rs1,
                     (op1_type op1_reg_class:$vs2),
                     (result_type vlmul.vrclass:$vd),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX)
                     $rs1, $vs2, $vd,
-                    (NoX0 GPR:$vl), sew)>;
+                    GPR:$vl, sew)>;
 
 class VPatAMOWDMask<string intrinsic_name,
                     string inst,
@@ -2138,10 +2144,10 @@ class VPatAMOWDMask<string intrinsic_name,
                     (op1_type op1_reg_class:$vs2),
                     (result_type vlmul.vrclass:$vd),
                     (mask_type V0),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK")
                     $rs1, $vs2, $vd,
-                    (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                    (mask_type V0), GPR:$vl, sew)>;
 
 multiclass VPatUSLoad<string intrinsic,
                       string inst,
@@ -2153,14 +2159,14 @@ multiclass VPatUSLoad<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(type (Intr GPR:$rs1, GPR:$vl)),
-                    (Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (Intr GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
     def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
-                               GPR:$rs1, (mask_type V0), GPR:$vl)),
+                               GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatUSLoadFF<string inst,
@@ -2171,13 +2177,13 @@ multiclass VPatUSLoadFF<string inst,
                         VReg reg_class>
 {
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(type (riscv_vleff GPR:$rs1, GPR:$vl)),
-                    (Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (riscv_vleff GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, GPR:$vl, sew)>;
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
     def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge),
-                                      GPR:$rs1, (mask_type V0), GPR:$vl)),
+                                      GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatSLoad<string intrinsic,
@@ -2190,14 +2196,14 @@ multiclass VPatSLoad<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, GPR:$vl)),
-                    (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
     def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
-                               GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl)),
+                               GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatILoad<string intrinsic,
@@ -2213,16 +2219,16 @@ multiclass VPatILoad<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
-    def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), GPR:$vl)),
-                    (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
 
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
     def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
                                GPR:$rs1, (idx_type idx_reg_class:$rs2),
-                               (mask_type V0), GPR:$vl)),
+                               (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatUSStore<string intrinsic,
@@ -2235,12 +2241,12 @@ multiclass VPatUSStore<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$vl),
-                    (Pseudo $rs3, $rs1, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+                    (Pseudo $rs3, $rs1, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
-    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), GPR:$vl),
-              (PseudoMask $rs3, $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+              (PseudoMask $rs3, $rs1, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatSStore<string intrinsic,
@@ -2253,12 +2259,12 @@ multiclass VPatSStore<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, GPR:$vl),
-                    (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl))),
+                    (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
-    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl),
-              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatIStore<string intrinsic,
@@ -2275,13 +2281,13 @@ multiclass VPatIStore<string intrinsic,
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
     def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1,
-                    (idx_type idx_reg_class:$rs2), GPR:$vl),
-              (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+                    (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl))),
+              (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
     def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1,
-                        (idx_type idx_reg_class:$rs2), (mask_type V0), GPR:$vl),
-              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                        (idx_type idx_reg_class:$rs2), (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatUnaryS_M<string intrinsic_name,
@@ -2289,13 +2295,13 @@ multiclass VPatUnaryS_M<string intrinsic_name,
 {
   foreach mti = AllMasks in {
     def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name)
-                      (mti.Mask VR:$rs1), GPR:$vl)),
+                      (mti.Mask VR:$rs1), (XLenVT (VLOp GPR:$vl)))),
                       (!cast<Instruction>(inst#"_M_"#mti.BX) $rs1,
-                      (NoX0 GPR:$vl), mti.SEW)>;
+                      GPR:$vl, mti.SEW)>;
     def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name # "_mask")
-                      (mti.Mask VR:$rs1), (mti.Mask V0), GPR:$vl)),
+                      (mti.Mask VR:$rs1), (mti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
                       (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") $rs1,
-                      (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
+                      (mti.Mask V0), GPR:$vl, mti.SEW)>;
   }
 }
 
@@ -2360,24 +2366,24 @@ multiclass VPatNullaryV<string intrinsic, string instruction>
 {
   foreach vti = AllIntegerVectors in {
     def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
-                          (XLenVT GPR:$vl))),
+                          (XLenVT (VLOp GPR:$vl)))),
                           (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX)
-                          (NoX0 GPR:$vl), vti.SEW)>;
+                          GPR:$vl, vti.SEW)>;
     def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask")
                           (vti.Vector vti.RegClass:$merge),
-                          (vti.Mask V0), (XLenVT GPR:$vl))),
+                          (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
                           (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK")
                           vti.RegClass:$merge, (vti.Mask V0),
-                          (NoX0 GPR:$vl), vti.SEW)>;
+                          GPR:$vl, vti.SEW)>;
   }
 }
 
 multiclass VPatNullaryM<string intrinsic, string inst> {
   foreach mti = AllMasks in
     def : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic)
-                        (XLenVT GPR:$vl))),
+                        (XLenVT (VLOp GPR:$vl)))),
                         (!cast<Instruction>(inst#"_M_"#mti.BX)
-                        (NoX0 GPR:$vl), mti.SEW)>;
+                        GPR:$vl, mti.SEW)>;
 }
 
 multiclass VPatBinary<string intrinsic,
@@ -2414,11 +2420,11 @@ multiclass VPatBinaryCarryIn<string intrinsic,
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
                          (mask_type V0),
-                         (XLenVT GPR:$vl))),
+                         (XLenVT (VLOp GPR:$vl)))),
                          (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
-                         (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                         (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatBinaryMaskOut<string intrinsic,
@@ -2435,11 +2441,11 @@ multiclass VPatBinaryMaskOut<string intrinsic,
   def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
-                         (XLenVT GPR:$vl))),
+                         (XLenVT (VLOp GPR:$vl)))),
                          (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
-                         (NoX0 GPR:$vl), sew)>;
+                         GPR:$vl, sew)>;
 }
 
 multiclass VPatConversion<string intrinsic,
@@ -3125,7 +3131,7 @@ def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins),
 // Pseudos.
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
 def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>;
-
+def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3142,6 +3148,9 @@ foreach eew = EEWList in {
   defm PseudoVSE # eew : VPseudoUSStore;
 }
 
+defm PseudoVLE1 : VPseudoLoadMask;
+defm PseudoVSE1 : VPseudoStoreMask;
+
 //===----------------------------------------------------------------------===//
 // 7.5 Vector Strided Instructions
 //===----------------------------------------------------------------------===//
@@ -3437,12 +3446,12 @@ defm PseudoVFSQRT      : VPseudoUnaryV_V;
 //===----------------------------------------------------------------------===//
 // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm PseudoVFRSQRTE7   : VPseudoUnaryV_V;
+defm PseudoVFRSQRT7    : VPseudoUnaryV_V;
 
 //===----------------------------------------------------------------------===//
 // 14.10. Vector Floating-Point Reciprocal Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm PseudoVFRECE7     : VPseudoUnaryV_V;
+defm PseudoVFREC7      : VPseudoUnaryV_V;
 
 //===----------------------------------------------------------------------===//
 // 14.11. Vector Floating-Point Min/Max Instructions
@@ -3719,6 +3728,15 @@ foreach vti = AllVectors in
                      vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
 }
 
+foreach vti = AllMasks in {
+  defvar PseudoVLE1 = !cast<Instruction>("PseudoVLE1_V_"#vti.BX);
+  def : Pat<(vti.Mask (int_riscv_vle1 GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+            (PseudoVLE1 $rs1, GPR:$vl, vti.SEW)>;
+  defvar PseudoVSE1 = !cast<Instruction>("PseudoVSE1_V_"#vti.BX);
+  def : Pat<(int_riscv_vse1 (vti.Mask VR:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+            (PseudoVSE1 $rs3, $rs1, GPR:$vl, vti.SEW)>;
+}
+
 //===----------------------------------------------------------------------===//
 // 7.5 Vector Strided Instructions
 //===----------------------------------------------------------------------===//
@@ -3886,62 +3904,63 @@ defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>
 // instruction.
 foreach vti = AllIntegerVectors in {
   def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1),
-                                       (vti.Scalar simm5_plus1:$rs2), GPR:$vl)),
+                                       (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                                (DecImm simm5_plus1:$rs2),
-                                                               (NoX0 GPR:$vl),
+                                                               GPR:$vl,
                                                                vti.SEW)>;
   def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0),
                                             (vti.Vector vti.RegClass:$rs1),
                                             (vti.Scalar simm5_plus1:$rs2),
                                             (vti.Mask VR:$merge),
-                                            GPR:$vl)),
+                                            (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
                                                       VR:$merge,
                                                       vti.RegClass:$rs1,
                                                       (DecImm simm5_plus1:$rs2),
                                                       (vti.Mask V0),
-                                                      (NoX0 GPR:$vl),
+                                                      GPR:$vl,
                                                       vti.SEW)>;
 
-  def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
-                                        (vti.Scalar simm5_plus1:$rs2), GPR:$vl)),
+ def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
+                                        (vti.Scalar simm5_plus1:$rs2),
+                                        (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                                 (DecImm simm5_plus1:$rs2),
-                                                                (NoX0 GPR:$vl),
+                                                                GPR:$vl,
                                                                 vti.SEW)>;
   def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
                                              (vti.Vector vti.RegClass:$rs1),
                                              (vti.Scalar simm5_plus1:$rs2),
                                              (vti.Mask VR:$merge),
-                                             GPR:$vl)),
+                                             (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
                                                       VR:$merge,
                                                       vti.RegClass:$rs1,
                                                       (DecImm simm5_plus1:$rs2),
                                                       (vti.Mask V0),
-                                                      (NoX0 GPR:$vl),
+                                                      GPR:$vl,
                                                       vti.SEW)>;
 
   // Special cases to avoid matching vmsltu.vi 0 (always false) to
   // vmsleu.vi -1 (always true). Instead match to vmsne.vv.
   def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
-                                        (vti.Scalar 0), GPR:$vl)),
+                                        (vti.Scalar 0), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                                vti.RegClass:$rs1,
-                                                               (NoX0 GPR:$vl),
+                                                               GPR:$vl,
                                                                vti.SEW)>;
   def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
                                             (vti.Vector vti.RegClass:$rs1),
                                             (vti.Scalar 0),
                                             (vti.Mask VR:$merge),
-                                            GPR:$vl)),
+                                            (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
                                                      VR:$merge,
                                                      vti.RegClass:$rs1,
                                                      vti.RegClass:$rs1,
                                                      (vti.Mask V0),
-                                                     (NoX0 GPR:$vl),
+                                                     GPR:$vl,
                                                      vti.SEW)>;
 }
 
@@ -4002,18 +4021,18 @@ defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">;
 //===----------------------------------------------------------------------===//
 foreach vti = AllVectors in {
   def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1),
-                                           GPR:$vl)),
+                                           (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
-             $rs1, (NoX0 GPR:$vl), vti.SEW)>;
+             $rs1, GPR:$vl, vti.SEW)>;
 }
 
 foreach vti = AllIntegerVectors in {
-  def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, GPR:$vl)),
+  def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
-             $rs2, (NoX0 GPR:$vl), vti.SEW)>;
-  def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, GPR:$vl)),
+             $rs2, GPR:$vl, vti.SEW)>;
+  def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
-             simm5:$imm5, (NoX0 GPR:$vl), vti.SEW)>;
+             simm5:$imm5, GPR:$vl, vti.SEW)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4109,12 +4128,12 @@ defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>;
 //===----------------------------------------------------------------------===//
 // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm "" : VPatUnaryV_V<"int_riscv_vfrsqrte7", "PseudoVFRSQRTE7", AllFloatVectors>;
+defm "" : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>;
 
 //===----------------------------------------------------------------------===//
 // 14.10. Vector Floating-Point Reciprocal Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm "" : VPatUnaryV_V<"int_riscv_vfrece7", "PseudoVFRECE7", AllFloatVectors>;
+defm "" : VPatUnaryV_V<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>;
 
 //===----------------------------------------------------------------------===//
 // 14.11. Vector Floating-Point Min/Max Instructions
@@ -4157,8 +4176,8 @@ foreach fvti = AllFloatVectors in {
   defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
   def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2),
                                             (fvti.Scalar (fpimm0)),
-                                            (fvti.Mask V0), (XLenVT GPR:$vl))),
-            (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), (NoX0 GPR:$vl), fvti.SEW)>;
+                                            (fvti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
+            (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.SEW)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4167,16 +4186,16 @@ foreach fvti = AllFloatVectors in {
 foreach fvti = AllFloatVectors in {
   // If we're splatting fpimm0, use vmv.v.x vd, x0.
   def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
-                         (fvti.Scalar (fpimm0)), GPR:$vl)),
+                         (fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
-             0, (NoX0 GPR:$vl), fvti.SEW)>;
+             0, GPR:$vl, fvti.SEW)>;
 
   def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
-                         (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)),
+                         (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
                                 fvti.LMul.MX)
              (fvti.Scalar fvti.ScalarRegClass:$rs2),
-             (NoX0 GPR:$vl), fvti.SEW)>;
+             GPR:$vl, fvti.SEW)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4321,9 +4340,9 @@ foreach vti = AllIntegerVectors in {
   def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)),
             (!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.SEW)>;
   def : Pat<(vti.Vector (int_riscv_vmv_s_x (vti.Vector vti.RegClass:$rs1),
-                                           GPR:$rs2, GPR:$vl)),
+                                           GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_S_X_" # vti.LMul.MX)
-             (vti.Vector $rs1), $rs2, (NoX0 GPR:$vl), vti.SEW)>;
+             (vti.Vector $rs1), $rs2, GPR:$vl, vti.SEW)>;
 }
 } // Predicates = [HasStdExtV]
 
@@ -4339,12 +4358,12 @@ foreach fvti = AllFloatVectors in {
                          (instr $rs2, fvti.SEW)>;
 
   def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
-                         (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)),
+                         (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" #
                                 fvti.LMul.MX)
              (fvti.Vector $rs1),
              (fvti.Scalar fvti.ScalarRegClass:$rs2),
-             (NoX0 GPR:$vl), fvti.SEW)>;
+             GPR:$vl, fvti.SEW)>;
 }
 } // Predicates = [HasStdExtV, HasStdExtF]
 
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index aea3d0e17ccc..dee67708bed1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -8,7 +8,7 @@
 ///
 /// This file contains the required infrastructure and SDNode patterns to
 /// support code generation for the standard 'V' (Vector) extension, version
-/// 0.9.  This version is still experimental as the 'V' extension hasn't been
+/// 0.10.  This version is still experimental as the 'V' extension hasn't been
 /// ratified yet.
 ///
 /// This file is included from and depends upon RISCVInstrInfoVPseudos.td
@@ -384,8 +384,8 @@ defm "" : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH">;
 defm "" : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU">;
 
 // 12.11. Vector Integer Divide Instructions
-defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIVU">;
-defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIV">;
+defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU">;
+defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">;
 defm "" : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">;
 defm "" : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
 
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
index a404f7ced70a..8c1fa840f19c 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
@@ -334,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) {
     return true;
   }
   // (m)1 patterns
-  return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+  return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val);
 }
 
 inline static bool isMImm32Val(uint32_t Val) {
@@ -347,14 +347,14 @@ inline static bool isMImm32Val(uint32_t Val) {
     return true;
   }
   // (m)1 patterns
-  return (Val & (1 << 31)) && isShiftedMask_32(Val);
+  return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val);
 }
 
 /// val2MImm - Convert an integer immediate value to target MImm immediate.
 inline static uint64_t val2MImm(uint64_t Val) {
   if (Val == 0)
     return 0; // (0)1
-  if (Val & (1UL << 63))
+  if (Val & (UINT64_C(1) << 63))
     return countLeadingOnes(Val);       // (m)1
   return countLeadingZeros(Val) | 0x40; // (m)0
 }
@@ -364,8 +364,8 @@ inline static uint64_t mimm2Val(uint64_t Val) {
   if (Val == 0)
     return 0; // (0)1
   if ((Val & 0x40) == 0)
-    return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1
-  return ((uint64_t)(-1L) >> (Val & 0x3f));        // (m)0
+    return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1
+  return ((uint64_t)INT64_C(-1) >> (Val & 0x3f));          // (m)0
 }
 
 inline unsigned M0(unsigned Val) { return Val + 64; }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 05e482a6b66e..4e6d8e8e1a54 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
       insn->addressSize = (insn->hasAdSize ? 4 : 8);
       insn->displacementSize = 4;
       insn->immediateSize = 4;
+      insn->hasOpSize = false;
     } else {
       insn->registerSize = (insn->hasOpSize ? 2 : 4);
       insn->addressSize = (insn->hasAdSize ? 4 : 8);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
index caf158102230..a1a16a19f5e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
@@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
       return false;
   }
 
+  // Make sure no potentially eflags clobbering phi moves can be inserted in
+  // between.
+  auto HasPhis = [](const BasicBlock *Succ) {
+    return !llvm::empty(Succ->phis());
+  };
+  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
+    return false;
+
   CC = TmpCC;
   return true;
 }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0dd20235aa3c..6b816c710f98 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36916,11 +36916,18 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
     Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res));
     return DAG.getBitcast(VT, Res);
   }
+  case X86ISD::VPERMILPI:
+    // TODO: Handle v4f64 permutes with different low/high lane masks.
+    if (SrcVT0 == MVT::v4f64) {
+      uint64_t Mask = Src0.getConstantOperandVal(1);
+      if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
+        break;
+    }
+    LLVM_FALLTHROUGH;
   case X86ISD::VSHLI:
   case X86ISD::VSRLI:
   case X86ISD::VSRAI:
   case X86ISD::PSHUFD:
-  case X86ISD::VPERMILPI:
     if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) {
       SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
       SDValue RHS =
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index 0c2b278fdd7b..19012797ae9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
 
 // vextractps - extract 32 bits from XMM
-def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
+def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
       (ins VR128X:$src1, u8imm:$src2),
       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
+      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
 
 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
index 7cf555748c46..a185a2007b72 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3778,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
                              VEX_4V, VEX_WIG;
   defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
                              i128mem, SchedWriteShuffle.XMM, load, 0>,
-                             VEX_4V;
+                             VEX_4V, VEX_WIG;
 }
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
@@ -3794,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
                               VEX_4V, VEX_L, VEX_WIG;
   defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
                               i256mem, SchedWriteShuffle.YMM, load, 0>,
-                              VEX_4V, VEX_L;
+                              VEX_4V, VEX_L, VEX_WIG;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -4756,7 +4756,7 @@ let isCommutable = 0 in {
                                   SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
                                   load, i128mem,
-                                  SchedWritePHAdd.XMM, 0>, VEX_4V;
+                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
                                       int_x86_ssse3_psign_b_128,
                                       SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
@@ -4802,7 +4802,7 @@ let isCommutable = 0 in {
                                   SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
                                   load, i256mem,
-                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
+                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPSIGNB   : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
                                        SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
   defm VPSIGNW   : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@@ -6503,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
 
 let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
+  defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
   defm PCMPISTRM  : pcmpistrm_SS42AI<"pcmpistrm"> ;
 }
 
@@ -6521,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
 
 let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
+  defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
   defm PCMPESTRM :  SS42AI_pcmpestrm<"pcmpestrm">;
 }
 
@@ -6539,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> {
 
 let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
   defm PCMPISTRI  : SS42AI_pcmpistri<"pcmpistri">;
 }
 
@@ -6557,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> {
 
 let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
   defm PCMPESTRI  : SS42AI_pcmpestri<"pcmpestri">;
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 30a1f81ad0e1..6730824e860a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -149,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
       if (isNoModRef(MRI))
         continue;
 
+      // A pseudo probe call shouldn't change any function attribute since it
+      // doesn't translate to a real instruction. It comes with a memory access
+      // tag to prevent itself being removed by optimizations and not block
+      // other instructions being optimized.
+      if (isa<PseudoProbeInst>(I))
+        continue;
+
       if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
         // The call could access any memory. If that includes writes, note it.
         if (isModSet(MRI))
@@ -1445,8 +1452,7 @@ static bool functionWillReturn(const Function &F) {
   // If there are no loops, then the function is willreturn if all calls in
   // it are willreturn.
   return all_of(instructions(F), [](const Instruction &I) {
-    const auto *CB = dyn_cast<CallBase>(&I);
-    return !CB || CB->hasFnAttr(Attribute::WillReturn);
+    return I.willReturn();
   });
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 37fc27e91100..158fa0771c3b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -30,7 +30,7 @@ namespace llvm {
 ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
                                                   StringRef CalleeName) {
   if (CalleeName.empty())
-    return getChildContext(CallSite);
+    return getHottestChildContext(CallSite);
 
   uint32_t Hash = nodeHash(CalleeName, CallSite);
   auto It = AllChildContext.find(Hash);
@@ -40,18 +40,22 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
 }
 
 ContextTrieNode *
-ContextTrieNode::getChildContext(const LineLocation &CallSite) {
+ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
   // CSFDO-TODO: This could be slow, change AllChildContext so we can
   // do point look up for child node by call site alone.
-  // CSFDO-TODO: Return the child with max count for indirect call
+  // Retrieve the child node with max count for indirect call
   ContextTrieNode *ChildNodeRet = nullptr;
+  uint64_t MaxCalleeSamples = 0;
   for (auto &It : AllChildContext) {
     ContextTrieNode &ChildNode = It.second;
-    if (ChildNode.CallSiteLoc == CallSite) {
-      if (ChildNodeRet)
-        return nullptr;
-      else
-        ChildNodeRet = &ChildNode;
+    if (ChildNode.CallSiteLoc != CallSite)
+      continue;
+    FunctionSamples *Samples = ChildNode.getFunctionSamples();
+    if (!Samples)
+      continue;
+    if (Samples->getTotalSamples() > MaxCalleeSamples) {
+      ChildNodeRet = &ChildNode;
+      MaxCalleeSamples = Samples->getTotalSamples();
     }
   }
 
@@ -179,7 +183,7 @@ SampleContextTracker::SampleContextTracker(
     SampleContext Context(FuncSample.first(), RawContext);
     LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
     if (!Context.isBaseContext())
-      FuncToCtxtProfileSet[Context.getName()].insert(FSamples);
+      FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples);
     ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
     assert(!NewNode->getFunctionSamples() &&
            "New node can't have sample profile");
@@ -191,12 +195,12 @@ FunctionSamples *
 SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
                                                  StringRef CalleeName) {
   LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n");
-  // CSFDO-TODO: We use CalleeName to differentiate indirect call
-  // We need to get sample for indirect callee too.
   DILocation *DIL = Inst.getDebugLoc();
   if (!DIL)
     return nullptr;
 
+  // For indirect call, CalleeName will be empty, in which case the context
+  // profile for callee with largest total samples will be returned.
   ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName);
   if (CalleeContext) {
     FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
@@ -209,6 +213,26 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
   return nullptr;
 }
 
+std::vector<const FunctionSamples *>
+SampleContextTracker::getIndirectCalleeContextSamplesFor(
+    const DILocation *DIL) {
+  std::vector<const FunctionSamples *> R;
+  if (!DIL)
+    return R;
+
+  ContextTrieNode *CallerNode = getContextFor(DIL);
+  LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+  for (auto &It : CallerNode->getAllChildContext()) {
+    ContextTrieNode &ChildNode = It.second;
+    if (ChildNode.getCallSiteLoc() != CallSite)
+      continue;
+    if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples())
+      R.push_back(CalleeSamples);
+  }
+
+  return R;
+}
+
 FunctionSamples *
 SampleContextTracker::getContextSamplesFor(const DILocation *DIL) {
   assert(DIL && "Expect non-null location");
@@ -239,6 +263,17 @@ SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
   return Node->getFunctionSamples();
 }
 
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
+  StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
+  return FuncToCtxtProfileSet[CanonName];
+}
+
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
+  return FuncToCtxtProfileSet[Name];
+}
+
 FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
                                                          bool MergeContext) {
   StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
@@ -295,11 +330,6 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
     const Instruction &Inst, StringRef CalleeName) {
   LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n"
                     << Inst << "\n");
-  // CSFDO-TODO: We also need to promote context profile from indirect
-  // calls. We won't have callee names from those from call instr.
-  if (CalleeName.empty())
-    return;
-
   // Get the caller context for the call instruction, we don't use callee
   // name from call because there can be context from indirect calls too.
   DILocation *DIL = Inst.getDebugLoc();
@@ -308,8 +338,23 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
     return;
 
   // Get the context that needs to be promoted
-  LineLocation CallSite(FunctionSamples::getOffset(DIL),
-                        DIL->getBaseDiscriminator());
+  LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+  // For indirect call, CalleeName will be empty, in which case we need to
+  // promote all non-inlined child context profiles.
+  if (CalleeName.empty()) {
+    for (auto &It : CallerNode->getAllChildContext()) {
+      ContextTrieNode *NodeToPromo = &It.second;
+      if (CallSite != NodeToPromo->getCallSiteLoc())
+        continue;
+      FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples();
+      if (FromSamples && FromSamples->getContext().hasState(InlinedContext))
+        continue;
+      promoteMergeContextSamplesTree(*NodeToPromo);
+    }
+    return;
+  }
+
+  // Get the context for the given callee that needs to be promoted
   ContextTrieNode *NodeToPromo =
       CallerNode->getChildContext(CallSite, CalleeName);
   if (!NodeToPromo)
@@ -329,6 +374,8 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
   LLVM_DEBUG(dbgs() << "  Found context tree root to promote: "
                     << FromSamples->getContext() << "\n");
 
+  assert(!FromSamples->getContext().hasState(InlinedContext) &&
+         "Shouldn't promote inlined context profile");
   StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
   return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
                                         ContextStrToRemove);
@@ -361,18 +408,14 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
                                           StringRef CalleeName) {
   assert(DIL && "Expect non-null location");
 
-  // CSSPGO-TODO: need to support indirect callee
-  if (CalleeName.empty())
-    return nullptr;
-
   ContextTrieNode *CallContext = getContextFor(DIL);
   if (!CallContext)
     return nullptr;
 
+  // When CalleeName is empty, the child context profile with max
+  // total samples will be returned.
   return CallContext->getChildContext(
-      LineLocation(FunctionSamples::getOffset(DIL),
-                   DIL->getBaseDiscriminator()),
-      CalleeName);
+      FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
 }
 
 ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
@@ -386,8 +429,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
     if (Name.empty())
       Name = PrevDIL->getScope()->getSubprogram()->getName();
     S.push_back(
-        std::make_pair(LineLocation(FunctionSamples::getOffset(DIL),
-                                    DIL->getBaseDiscriminator()), Name));
+        std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
+                       PrevDIL->getScope()->getSubprogram()->getLinkageName()));
     PrevDIL = DIL;
   }
 
@@ -518,4 +561,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
   return *ToNode;
 }
 
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleContextTracker::addCallGraphEdges(CallGraph &CG,
+                                             StringMap<Function *> &SymbolMap) {
+  // Add profile call edges to the call graph.
+  std::queue<ContextTrieNode *> NodeQueue;
+  NodeQueue.push(&RootContext);
+  while (!NodeQueue.empty()) {
+    ContextTrieNode *Node = NodeQueue.front();
+    NodeQueue.pop();
+    Function *F = SymbolMap.lookup(Node->getFuncName());
+    for (auto &I : Node->getAllChildContext()) {
+      ContextTrieNode *ChildNode = &I.second;
+      NodeQueue.push(ChildNode);
+      if (F && !F->isDeclaration()) {
+        Function *Callee = SymbolMap.lookup(ChildNode->getFuncName());
+        if (Callee && !Callee->isDeclaration())
+          CG[F]->addCalledFunction(nullptr, CG[Callee]);
+      }
+    }
+  }
+}
 } // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 264ac4065e8c..a6a419bfe742 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -107,6 +108,16 @@ STATISTIC(NumCSNotInlined,
 STATISTIC(NumMismatchedProfile,
           "Number of functions with CFG mismatched profile");
 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+          "Number of inlined callsites with a partial distribution factor");
+
+STATISTIC(NumCSInlinedHitMinLimit,
+          "Number of functions with FDO inline stopped due to min size limit");
+STATISTIC(NumCSInlinedHitMaxLimit,
+          "Number of functions with FDO inline stopped due to max size limit");
+STATISTIC(
+    NumCSInlinedHitGrowthLimit,
+    "Number of functions with FDO inline stopped due to growth size limit");
 
 // Command line option to specify the file to read samples from. This is
 // mainly used for debugging.
@@ -166,11 +177,53 @@ static cl::opt<bool> ProfileTopDownLoad(
              "order of call graph during sample profile loading. It only "
              "works for new pass manager. "));
 
+static cl::opt<bool> UseProfileIndirectCallEdges(
+    "use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
+    cl::desc("Considering indirect call samples from profile when top-down "
+             "processing functions. Only CSSPGO is supported."));
+
+static cl::opt<bool> UseProfileTopDownOrder(
+    "use-profile-top-down-order", cl::init(false), cl::Hidden,
+    cl::desc("Process functions in one SCC in a top-down order "
+             "based on the input profile."));
+
 static cl::opt<bool> ProfileSizeInline(
     "sample-profile-inline-size", cl::Hidden, cl::init(false),
     cl::desc("Inline cold call sites in profile loader if it's beneficial "
              "for code size."));
 
+static cl::opt<int> ProfileInlineGrowthLimit(
+    "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
+    cl::desc("The size growth ratio limit for proirity-based sample profile "
+             "loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMin(
+    "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
+    cl::desc("The lower bound of size growth limit for "
+             "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMax(
+    "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
+    cl::desc("The upper bound of size growth limit for "
+             "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileICPThreshold(
+    "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
+    cl::desc(
+        "Relative hotness threshold for indirect "
+        "call promotion in proirity-based sample profile loader inlining."));
+
+static cl::opt<int> SampleHotCallSiteThreshold(
+    "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
+    cl::desc("Hot callsite threshold for proirity-based sample profile loader "
+             "inlining."));
+
+static cl::opt<bool> CallsitePrioritizedInline(
+    "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
+    cl::init(false),
+    cl::desc("Use call site prioritized inlining for sample profile loader."
+             "Currently only CSSPGO is supported."));
+
 static cl::opt<int> SampleColdCallSiteThreshold(
     "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining cold callsites"));
@@ -313,6 +366,38 @@ private:
   DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
 };
 
+// Inline candidate used by iterative callsite prioritized inliner
+struct InlineCandidate {
+  CallBase *CallInstr;
+  const FunctionSamples *CalleeSamples;
+  // Prorated callsite count, which will be used to guide inlining. For example,
+  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+  // copies will get their own distribution factors and their prorated counts
+  // will be used to decide if they should be inlined independently.
+  uint64_t CallsiteCount;
+  // Call site distribution factor to prorate the profile samples for a
+  // duplicated callsite. Default value is 1.0.
+  float CallsiteDistribution;
+};
+
+// Inline candidate comparer using call site weight
+struct CandidateComparer {
+  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
+    if (LHS.CallsiteCount != RHS.CallsiteCount)
+      return LHS.CallsiteCount < RHS.CallsiteCount;
+
+    // Tie breaker using GUID so we have stable/deterministic inlining order
+    assert(LHS.CalleeSamples && RHS.CalleeSamples &&
+           "Expect non-null FunctionSamples");
+    return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
+           RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
+  }
+};
+
+using CandidateQueue =
+    PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
+                  CandidateComparer>;
+
 /// Sample profile pass.
 ///
 /// This pass reads profile data from the file specified by
@@ -350,9 +435,21 @@ protected:
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
   mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
-  bool inlineCallInstruction(CallBase &CB);
+  // Attempt to promote indirect call and also inline the promoted call
+  bool tryPromoteAndInlineCandidate(
+      Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+      uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
+      SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
+  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
+  bool
+  tryInlineCandidate(InlineCandidate &Candidate,
+                     SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+  bool
+  inlineHotFunctionsWithPriority(Function &F,
+                                 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
   // Inline cold/small functions in addition to hot ones
   bool shouldInlineColdCallee(CallBase &CallInst);
   void emitOptimizationRemarksForInlineCandidates(
@@ -371,6 +468,8 @@ protected:
   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(Function &F);
   std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
+  void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
+  void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
   bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
   void computeDominanceAndLoopInfo(Function &F);
   void clearFunctionData();
@@ -808,7 +907,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
 
   const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
   if (R) {
-    uint64_t Samples = R.get();
+    uint64_t Samples = R.get() * Probe->Factor;
     bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
     if (FirstMark) {
       ORE->emit([&]() {
@@ -816,13 +915,17 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
         Remark << "Applied " << ore::NV("NumSamples", Samples);
         Remark << " samples from profile (ProbeId=";
         Remark << ore::NV("ProbeId", Probe->Id);
+        Remark << ", Factor=";
+        Remark << ore::NV("Factor", Probe->Factor);
+        Remark << ", OriginalSamples=";
+        Remark << ore::NV("OriginalSamples", R.get());
         Remark << ")";
         return Remark;
       });
     }
-
     LLVM_DEBUG(dbgs() << "    " << Probe->Id << ":" << Inst
-                      << " - weight: " << R.get() << ")\n");
+                      << " - weight: " << R.get() << " - factor: "
+                      << format("%0.2f", Probe->Factor) << ")\n");
     return Samples;
   }
   return R;
@@ -918,6 +1021,31 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
     return R;
   }
 
+  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
+    assert(L && R && "Expect non-null FunctionSamples");
+    if (L->getEntrySamples() != R->getEntrySamples())
+      return L->getEntrySamples() > R->getEntrySamples();
+    return FunctionSamples::getGUID(L->getName()) <
+           FunctionSamples::getGUID(R->getName());
+  };
+
+  if (ProfileIsCS) {
+    auto CalleeSamples =
+        ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
+    if (CalleeSamples.empty())
+      return R;
+
+    // For CSSPGO, we only use target context profile's entry count
+    // as that already includes both inlined callee and non-inlined ones..
+    Sum = 0;
+    for (const auto *const FS : CalleeSamples) {
+      Sum += FS->getEntrySamples();
+      R.push_back(FS);
+    }
+    llvm::sort(R, FSCompare);
+    return R;
+  }
+
   const FunctionSamples *FS = findFunctionSamples(Inst);
   if (FS == nullptr)
     return R;
@@ -935,12 +1063,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
       Sum += NameFS.second.getEntrySamples();
       R.push_back(&NameFS.second);
     }
-    llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
-      if (L->getEntrySamples() != R->getEntrySamples())
-        return L->getEntrySamples() > R->getEntrySamples();
-      return FunctionSamples::getGUID(L->getName()) <
-             FunctionSamples::getGUID(R->getName());
-    });
+    llvm::sort(R, FSCompare);
   }
   return R;
 }
@@ -977,42 +1100,64 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
   return it.first->second;
 }
 
-bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
-  if (ExternalInlineAdvisor) {
-    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
-    if (!Advice->isInliningRecommended()) {
-      Advice->recordUnattemptedInlining();
-      return false;
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F  Caller function.
+/// \param Candidate  ICP and inline candidate.
+/// \param Sum  Sum of target counts for indirect call.
+/// \param PromotedInsns  Map to keep track of indirect call already processed.
+/// \param Candidate  ICP and inline candidate.
+/// \param InlinedCallSite  Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+    Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
+    DenseSet<Instruction *> &PromotedInsns,
+    SmallVector<CallBase *, 8> *InlinedCallSite) {
+  const char *Reason = "Callee function not available";
+  // R->getValue() != &F is to prevent promoting a recursive call.
+  // If it is a recursive call, we do not inline it as it could bloat
+  // the code exponentially. There is way to better handle this, e.g.
+  // clone the caller first, and inline the cloned caller if it is
+  // recursive. As llvm does not inline recursive calls, we will
+  // simply ignore it instead of handling it explicitly.
+  auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
+  if (R != SymbolMap.end() && R->getValue() &&
+      !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
+      R->getValue()->hasFnAttribute("use-sample-profile") &&
+      R->getValue() != &F &&
+      isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
+    auto *DI =
+        &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+                                  Candidate.CallsiteCount, Sum, false, ORE);
+    if (DI) {
+      Sum -= Candidate.CallsiteCount;
+      // Prorate the indirect callsite distribution.
+      // Do not update the promoted direct callsite distribution at this
+      // point since the original distribution combined with the callee
+      // profile will be used to prorate callsites from the callee if
+      // inlined. Once not inlined, the direct callsite distribution should
+      // be prorated so that the it will reflect the real callsite counts.
+      setProbeDistributionFactor(*Candidate.CallInstr,
+                                 Candidate.CallsiteDistribution * Sum /
+                                     SumOrigin);
+      PromotedInsns.insert(Candidate.CallInstr);
+      Candidate.CallInstr = DI;
+      if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+        bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+        if (!Inlined) {
+          // Prorate the direct callsite distribution so that it reflects real
+          // callsite counts.
+          setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+                                              Candidate.CallsiteCount /
+                                              SumOrigin);
+        }
+        return Inlined;
+      }
     }
-    // Dummy record, we don't use it for replay.
-    Advice->recordInlining();
-  }
-
-  Function *CalledFunction = CB.getCalledFunction();
-  assert(CalledFunction);
-  DebugLoc DLoc = CB.getDebugLoc();
-  BasicBlock *BB = CB.getParent();
-  InlineParams Params = getInlineParams();
-  Params.ComputeFullInlineCost = true;
-  // Checks if there is anything in the reachable portion of the callee at
-  // this callsite that makes this inlining potentially illegal. Need to
-  // set ComputeFullInlineCost, otherwise getInlineCost may return early
-  // when cost exceeds threshold without checking all IRs in the callee.
-  // The acutal cost does not matter because we only checks isNever() to
-  // see if it is legal to inline the callsite.
-  InlineCost Cost =
-      getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
-  if (Cost.isNever()) {
-    ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
-              << "incompatible inlining");
-    return false;
-  }
-  InlineFunctionInfo IFI(nullptr, GetAC);
-  if (InlineFunction(CB, IFI).isSuccess()) {
-    // The call to InlineFunction erases I, so we can't pass it here.
-    emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
-                    true, CSINLINE_DEBUG);
-    return true;
+  } else {
+    LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+                      << Candidate.CalleeSamples->getFuncName() << " because "
+                      << Reason << "\n");
   }
   return false;
 }
@@ -1078,10 +1223,11 @@ bool SampleProfileLoader::inlineHotFunctions(
          "ProfAccForSymsInList should be false when profile-sample-accurate "
          "is enabled");
 
-  DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
+  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
   bool Changed = false;
-  while (true) {
-    bool LocalChanged = false;
+  bool LocalChanged = true;
+  while (LocalChanged) {
+    LocalChanged = false;
     SmallVector<CallBase *, 10> CIS;
     for (auto &BB : F) {
       bool Hot = false;
@@ -1095,7 +1241,7 @@ bool SampleProfileLoader::inlineHotFunctions(
                    "GUIDToFuncNameMap has to be populated");
             AllCandidates.push_back(CB);
             if (FS->getEntrySamples() > 0 || ProfileIsCS)
-              localNotInlinedCallSites.try_emplace(CB, FS);
+              LocalNotInlinedCallSites.try_emplace(CB, FS);
             if (callsiteIsHot(FS, PSI))
               Hot = true;
             else if (shouldInlineColdCallee(*CB))
@@ -1113,6 +1259,11 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
     for (CallBase *I : CIS) {
       Function *CalledFunction = I->getCalledFunction();
+      InlineCandidate Candidate = {
+          I,
+          LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+                                            : nullptr,
+          0 /* dummy count */, 1.0 /* dummy distribution factor */};
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
@@ -1121,6 +1272,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           continue;
         uint64_t Sum;
         for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+          uint64_t SumOrigin = Sum;
           if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
             FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
                                      PSI->getOrCompHotCountThreshold());
@@ -1129,65 +1281,34 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          const char *Reason = "Callee function not available";
-          // R->getValue() != &F is to prevent promoting a recursive call.
-          // If it is a recursive call, we do not inline it as it could bloat
-          // the code exponentially. There is way to better handle this, e.g.
-          // clone the caller first, and inline the cloned caller if it is
-          // recursive. As llvm does not inline recursive calls, we will
-          // simply ignore it instead of handling it explicitly.
-          auto CalleeFunctionName = FS->getFuncName();
-          auto R = SymbolMap.find(CalleeFunctionName);
-          if (R != SymbolMap.end() && R->getValue() &&
-              !R->getValue()->isDeclaration() &&
-              R->getValue()->getSubprogram() &&
-              R->getValue()->hasFnAttribute("use-sample-profile") &&
-              R->getValue() != &F &&
-              isLegalToPromote(*I, R->getValue(), &Reason)) {
-            uint64_t C = FS->getEntrySamples();
-            auto &DI =
-                pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE);
-            Sum -= C;
-            PromotedInsns.insert(I);
-            // If profile mismatches, we should not attempt to inline DI.
-            if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-                inlineCallInstruction(cast<CallBase>(DI))) {
-              if (ProfileIsCS)
-                ContextTracker->markContextSamplesInlined(FS);
-              localNotInlinedCallSites.erase(I);
-              LocalChanged = true;
-              ++NumCSInlined;
-            }
-          } else {
-            LLVM_DEBUG(dbgs()
-                       << "\nFailed to promote indirect call to "
-                       << CalleeFunctionName << " because " << Reason << "\n");
+          Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+          if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                           PromotedInsns)) {
+            LocalNotInlinedCallSites.erase(I);
+            LocalChanged = true;
           }
         }
       } else if (CalledFunction && CalledFunction->getSubprogram() &&
                  !CalledFunction->isDeclaration()) {
-        if (inlineCallInstruction(*I)) {
-          if (ProfileIsCS)
-            ContextTracker->markContextSamplesInlined(
-                localNotInlinedCallSites[I]);
-          localNotInlinedCallSites.erase(I);
+        if (tryInlineCandidate(Candidate)) {
+          LocalNotInlinedCallSites.erase(I);
           LocalChanged = true;
-          ++NumCSInlined;
         }
       } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
         findCalleeFunctionSamples(*I)->findInlinedFunctions(
             InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
       }
     }
-    if (LocalChanged) {
-      Changed = true;
-    } else {
-      break;
-    }
+    Changed |= LocalChanged;
   }
 
+  // For CS profile, profile for not inlined context will be merged when
+  // base profile is being trieved
+  if (ProfileIsCS)
+    return Changed;
+
   // Accumulate not inlined callsite information into notInlinedSamples
-  for (const auto &Pair : localNotInlinedCallSites) {
+  for (const auto &Pair : LocalNotInlinedCallSites) {
     CallBase *I = Pair.getFirst();
     Function *Callee = I->getCalledFunction();
     if (!Callee || Callee->isDeclaration())
@@ -1232,6 +1353,266 @@ bool SampleProfileLoader::inlineHotFunctions(
   return Changed;
 }
 
+bool SampleProfileLoader::tryInlineCandidate(
+    InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
+
+  CallBase &CB = *Candidate.CallInstr;
+  Function *CalledFunction = CB.getCalledFunction();
+  assert(CalledFunction && "Expect a callee with definition");
+  DebugLoc DLoc = CB.getDebugLoc();
+  BasicBlock *BB = CB.getParent();
+
+  InlineCost Cost = shouldInlineCandidate(Candidate);
+  if (Cost.isNever()) {
+    ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
+              << "incompatible inlining");
+    return false;
+  }
+
+  if (!Cost)
+    return false;
+
+  InlineFunctionInfo IFI(nullptr, GetAC);
+  if (InlineFunction(CB, IFI).isSuccess()) {
+    // The call to InlineFunction erases I, so we can't pass it here.
+    emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
+                    true, CSINLINE_DEBUG);
+
+    // Now populate the list of newly exposed call sites.
+    if (InlinedCallSites) {
+      InlinedCallSites->clear();
+      for (auto &I : IFI.InlinedCallSites)
+        InlinedCallSites->push_back(I);
+    }
+
+    if (ProfileIsCS)
+      ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
+    ++NumCSInlined;
+
+    // Prorate inlined probes for a duplicated inlining callsite which probably
+    // has a distribution less than 100%. Samples for an inlinee should be
+    // distributed among the copies of the original callsite based on each
+    // callsite's distribution factor for counts accuracy. Note that an inlined
+    // probe may come with its own distribution factor if it has been duplicated
+    // in the inlinee body. The two factor are multiplied to reflect the
+    // aggregation of duplication.
+    if (Candidate.CallsiteDistribution < 1) {
+      for (auto &I : IFI.InlinedCallSites) {
+        if (Optional<PseudoProbe> Probe = extractProbe(*I))
+          setProbeDistributionFactor(*I, Probe->Factor *
+                                             Candidate.CallsiteDistribution);
+      }
+      NumDuplicatedInlinesite++;
+    }
+
+    return true;
+  }
+  return false;
+}
+
+bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
+                                             CallBase *CB) {
+  assert(CB && "Expect non-null call instruction");
+
+  if (isa<IntrinsicInst>(CB))
+    return false;
+
+  // Find the callee's profile. For indirect call, find hottest target profile.
+  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
+  if (!CalleeSamples)
+    return false;
+
+  float Factor = 1.0;
+  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+    Factor = Probe->Factor;
+
+  uint64_t CallsiteCount = 0;
+  ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
+  if (Weight)
+    CallsiteCount = Weight.get();
+  if (CalleeSamples)
+    CallsiteCount = std::max(
+        CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
+
+  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
+  return true;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+  std::unique_ptr<InlineAdvice> Advice = nullptr;
+  if (ExternalInlineAdvisor) {
+    Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
+    if (!Advice->isInliningRecommended()) {
+      Advice->recordUnattemptedInlining();
+      return InlineCost::getNever("not previously inlined");
+    }
+    Advice->recordInlining();
+    return InlineCost::getAlways("previously inlined");
+  }
+
+  // Adjust threshold based on call site hotness, only do this for callsite
+  // prioritized inliner because otherwise cost-benefit check is done earlier.
+  int SampleThreshold = SampleColdCallSiteThreshold;
+  if (CallsitePrioritizedInline) {
+    if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
+      SampleThreshold = SampleHotCallSiteThreshold;
+    else if (!ProfileSizeInline)
+      return InlineCost::getNever("cold callsite");
+  }
+
+  Function *Callee = Candidate.CallInstr->getCalledFunction();
+  assert(Callee && "Expect a definition for inline candidate of direct call");
+
+  InlineParams Params = getInlineParams();
+  Params.ComputeFullInlineCost = true;
+  // Checks if there is anything in the reachable portion of the callee at
+  // this callsite that makes this inlining potentially illegal. Need to
+  // set ComputeFullInlineCost, otherwise getInlineCost may return early
+  // when cost exceeds threshold without checking all IRs in the callee.
+  // The acutal cost does not matter because we only checks isNever() to
+  // see if it is legal to inline the callsite.
+  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
+                                  GetTTI(*Callee), GetAC, GetTLI);
+
+  // Honor always inline and never inline from call analyzer
+  if (Cost.isNever() || Cost.isAlways())
+    return Cost;
+
+  // For old FDO inliner, we inline the call site as long as cost is not
+  // "Never". The cost-benefit check is done earlier.
+  if (!CallsitePrioritizedInline) {
+    return InlineCost::get(Cost.getCost(), INT_MAX);
+  }
+
+  // Otherwise only use the cost from call analyzer, but overwite threshold with
+  // Sample PGO threshold.
+  return InlineCost::get(Cost.getCost(), SampleThreshold);
+}
+
+bool SampleProfileLoader::inlineHotFunctionsWithPriority(
+    Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+  DenseSet<Instruction *> PromotedInsns;
+  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
+
+  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+  // Profile symbol list is ignored when profile-sample-accurate is on.
+  assert((!ProfAccForSymsInList ||
+          (!ProfileSampleAccurate &&
+           !F.hasFnAttribute("profile-sample-accurate"))) &&
+         "ProfAccForSymsInList should be false when profile-sample-accurate "
+         "is enabled");
+
+  // Populating worklist with initial call sites from root inliner, along
+  // with call site weights.
+  CandidateQueue CQueue;
+  InlineCandidate NewCandidate;
+  for (auto &BB : F) {
+    for (auto &I : BB.getInstList()) {
+      auto *CB = dyn_cast<CallBase>(&I);
+      if (!CB)
+        continue;
+      if (getInlineCandidate(&NewCandidate, CB))
+        CQueue.push(NewCandidate);
+    }
+  }
+
+  // Cap the size growth from profile guided inlining. This is needed even
+  // though cost of each inline candidate already accounts for callee size,
+  // because with top-down inlining, we can grow inliner size significantly
+  // with large number of smaller inlinees each pass the cost check.
+  assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&
+         "Max inline size limit should not be smaller than min inline size "
+         "limit.");
+  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
+  SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
+  SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
+  if (ExternalInlineAdvisor)
+    SizeLimit = std::numeric_limits<unsigned>::max();
+
+  // Perform iterative BFS call site prioritized inlining
+  bool Changed = false;
+  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
+    InlineCandidate Candidate = CQueue.top();
+    CQueue.pop();
+    CallBase *I = Candidate.CallInstr;
+    Function *CalledFunction = I->getCalledFunction();
+
+    if (CalledFunction == &F)
+      continue;
+    if (I->isIndirectCall()) {
+      if (PromotedInsns.count(I))
+        continue;
+      uint64_t Sum;
+      auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
+      uint64_t SumOrigin = Sum;
+      Sum *= Candidate.CallsiteDistribution;
+      for (const auto *FS : CalleeSamples) {
+        // TODO: Consider disable pre-lTO ICP for MonoLTO as well
+        if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+          FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+                                   PSI->getOrCompHotCountThreshold());
+          continue;
+        }
+        uint64_t EntryCountDistributed =
+            FS->getEntrySamples() * Candidate.CallsiteDistribution;
+        // In addition to regular inline cost check, we also need to make sure
+        // ICP isn't introducing excessive speculative checks even if individual
+        // target looks beneficial to promote and inline. That means we should
+        // only do ICP when there's a small number dominant targets.
+        if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
+          break;
+        // TODO: Fix CallAnalyzer to handle all indirect calls.
+        // For indirect call, we don't run CallAnalyzer to get InlineCost
+        // before actual inlining. This is because we could see two different
+        // types from the same definition, which makes CallAnalyzer choke as
+        // it's expecting matching parameter type on both caller and callee
+        // side. See example from PR18962 for the triggering cases (the bug was
+        // fixed, but we generate different types).
+        if (!PSI->isHotCount(EntryCountDistributed))
+          break;
+        SmallVector<CallBase *, 8> InlinedCallSites;
+        // Attach function profile for promoted indirect callee, and update
+        // call site count for the promoted inline candidate too.
+        Candidate = {I, FS, EntryCountDistributed,
+                     Candidate.CallsiteDistribution};
+        if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                         PromotedInsns, &InlinedCallSites)) {
+          for (auto *CB : InlinedCallSites) {
+            if (getInlineCandidate(&NewCandidate, CB))
+              CQueue.emplace(NewCandidate);
+          }
+          Changed = true;
+        }
+      }
+    } else if (CalledFunction && CalledFunction->getSubprogram() &&
+               !CalledFunction->isDeclaration()) {
+      SmallVector<CallBase *, 8> InlinedCallSites;
+      if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
+        for (auto *CB : InlinedCallSites) {
+          if (getInlineCandidate(&NewCandidate, CB))
+            CQueue.emplace(NewCandidate);
+        }
+        Changed = true;
+      }
+    } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+      findCalleeFunctionSamples(*I)->findInlinedFunctions(
+          InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
+    }
+  }
+
+  if (!CQueue.empty()) {
+    if (SizeLimit == (unsigned)ProfileInlineLimitMax)
+      ++NumCSInlinedHitMaxLimit;
+    else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
+      ++NumCSInlinedHitMinLimit;
+    else
+      ++NumCSInlinedHitGrowthLimit;
+  }
+
+  return Changed;
+}
+
 /// Find equivalence classes for the given block.
 ///
 /// This finds all the blocks that are guaranteed to execute the same
@@ -1654,6 +2035,14 @@ void SampleProfileLoader::propagateWeights(Function &F) {
           auto T = FS->findCallTargetMapAt(CallSite);
           if (!T || T.get().empty())
             continue;
+          // Prorate the callsite counts to reflect what is already done to the
+          // callsite, such as ICP or calliste cloning.
+          if (FunctionSamples::ProfileIsProbeBased) {
+            if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+              if (Probe->Factor < 1)
+                T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+            }
+          }
           SmallVector<InstrProfValueData, 2> SortedCallTargets =
               GetSortedValueDataFromCallTargets(T.get());
           uint64_t Sum;
@@ -1833,7 +2222,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
   }
 
   DenseSet<GlobalValue::GUID> InlinedGUIDs;
-  Changed |= inlineHotFunctions(F, InlinedGUIDs);
+  if (ProfileIsCS && CallsitePrioritizedInline)
+    Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
+  else
+    Changed |= inlineHotFunctions(F, InlinedGUIDs);
 
   // Compute basic block weights.
   Changed |= computeBlockWeights(F);
@@ -1898,6 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
                     "Sample Profile loader", false, false)
 
+// Add inlined profile call edges to the call graph.
+void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
+                                            const FunctionSamples &Samples) {
+  Function *Caller = SymbolMap.lookup(Samples.getFuncName());
+  if (!Caller || Caller->isDeclaration())
+    return;
+
+  // Skip non-inlined call edges which are not important since top down inlining
+  // for non-CS profile is to get more precise profile matching, not to enable
+  // more inlining.
+
+  for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
+    for (const auto &InlinedSamples : CallsiteSamples.second) {
+      Function *Callee = SymbolMap.lookup(InlinedSamples.first);
+      if (Callee && !Callee->isDeclaration())
+        CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
+      addCallGraphEdges(CG, InlinedSamples.second);
+    }
+  }
+}
+
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleProfileLoader::replaceCallGraphEdges(
+    CallGraph &CG, StringMap<Function *> &SymbolMap) {
+  // Remove static call edges from the call graph except for the ones from the
+  // root which make the call graph connected.
+  for (const auto &Node : CG)
+    if (Node.second.get() != CG.getExternalCallingNode())
+      Node.second->removeAllCalledFunctions();
+
+  // Add profile call edges to the call graph.
+  if (ProfileIsCS) {
+    ContextTracker->addCallGraphEdges(CG, SymbolMap);
+  } else {
+    for (const auto &Samples : Reader->getProfiles())
+      addCallGraphEdges(CG, Samples.second);
+  }
+}
+
 std::vector<Function *>
 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   std::vector<Function *> FunctionOrderList;
@@ -1920,16 +2351,97 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   }
 
   assert(&CG->getModule() == &M);
+
+  // Add indirect call edges from profile to augment the static call graph.
+  // Functions will be processed in a top-down order defined by the static call
+  // graph. Adjusting the order by considering indirect call edges from the
+  // profile (which don't exist in the static call graph) can enable the
+  // inlining of indirect call targets by processing the caller before them.
+  // TODO: enable this for non-CS profile and fix the counts returning logic to
+  // have a full support for indirect calls.
+  if (UseProfileIndirectCallEdges && ProfileIsCS) {
+    for (auto &Entry : *CG) {
+      const auto *F = Entry.first;
+      if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
+        continue;
+      auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
+      if (AllContexts.empty())
+        continue;
+
+      for (const auto &BB : *F) {
+        for (const auto &I : BB.getInstList()) {
+          const auto *CB = dyn_cast<CallBase>(&I);
+          if (!CB || !CB->isIndirectCall())
+            continue;
+          const DebugLoc &DLoc = I.getDebugLoc();
+          if (!DLoc)
+            continue;
+          auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
+          for (FunctionSamples *Samples : AllContexts) {
+            if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
+              for (const auto &Target : CallTargets.get()) {
+                Function *Callee = SymbolMap.lookup(Target.first());
+                if (Callee && !Callee->isDeclaration())
+                  Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Compute a top-down order the profile which is used to sort functions in
+  // one SCC later. The static processing order computed for an SCC may not
+  // reflect the call contexts in the context-sensitive profile, thus may cause
+  // potential inlining to be overlooked. The function order in one SCC is being
+  // adjusted to a top-down order based on the profile to favor more inlining.
+  DenseMap<Function *, uint64_t> ProfileOrderMap;
+  if (UseProfileTopDownOrder ||
+      (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
+    // Create a static call graph. The call edges are not important since they
+    // will be replaced by dynamic edges from the profile.
+    CallGraph ProfileCG(M);
+    replaceCallGraphEdges(ProfileCG, SymbolMap);
+    scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
+    uint64_t I = 0;
+    while (!CGI.isAtEnd()) {
+      for (CallGraphNode *Node : *CGI) {
+        if (auto *F = Node->getFunction())
+          ProfileOrderMap[F] = ++I;
+      }
+      ++CGI;
+    }
+  }
+
   scc_iterator<CallGraph *> CGI = scc_begin(CG);
   while (!CGI.isAtEnd()) {
-    for (CallGraphNode *node : *CGI) {
-      auto F = node->getFunction();
+    uint64_t Start = FunctionOrderList.size();
+    for (CallGraphNode *Node : *CGI) {
+      auto *F = Node->getFunction();
       if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
         FunctionOrderList.push_back(F);
     }
+
+    // Sort nodes in SCC based on the profile top-down order.
+    if (!ProfileOrderMap.empty()) {
+      std::stable_sort(FunctionOrderList.begin() + Start,
+                       FunctionOrderList.end(),
+                       [&ProfileOrderMap](Function *Left, Function *Right) {
+                         return ProfileOrderMap[Left] < ProfileOrderMap[Right];
+                       });
+    }
+
     ++CGI;
   }
 
+  LLVM_DEBUG({
+    dbgs() << "Function processing order:\n";
+    for (auto F : reverse(FunctionOrderList)) {
+      dbgs() << F->getName() << "\n";
+    }
+  });
+
   std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
   return FunctionOrderList;
 }
@@ -1978,6 +2490,12 @@ bool SampleProfileLoader::doInitialization(Module &M,
     ProfileIsCS = true;
     FunctionSamples::ProfileIsCS = true;
 
+    // Enable priority-base inliner and size inline by default for CSSPGO.
+    if (!ProfileSizeInline.getNumOccurrences())
+      ProfileSizeInline = true;
+    if (!CallsitePrioritizedInline.getNumOccurrences())
+      CallsitePrioritizedInline = true;
+
     // Tracker for profiles under different context
     ContextTracker =
         std::make_unique<SampleContextTracker>(Reader->getProfiles());
@@ -2075,6 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
 }
 
 bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
+  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
   DILocation2SampleMap.clear();
   // By default the entry count is initialized to -1, which will be treated
   // conservatively by getEntryCount as the same as unknown (None). This is
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 7cecd20b78d8..a885c3ee4ded 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -25,8 +26,10 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
 #include <vector>
 
 using namespace llvm;
@@ -35,6 +38,115 @@ using namespace llvm;
 STATISTIC(ArtificialDbgLine,
           "Number of probes that have an artificial debug line");
 
+static cl::opt<bool>
+    VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+                      cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+    "verify-pseudo-probe-funcs", cl::Hidden,
+    cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+    UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+                      cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+  // Skip function declaration.
+  if (F->isDeclaration())
+    return false;
+  // Skip function that will not be emitted into object file. The prevailing
+  // defintion will be verified instead.
+  if (F->hasAvailableExternallyLinkage())
+    return false;
+  // Do a name matching.
+  static std::unordered_set<std::string> VerifyFuncNames(
+      VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+  return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+  if (VerifyPseudoProbe) {
+    PIC.registerAfterPassCallback(
+        [this](StringRef P, Any IR, const PreservedAnalyses &) {
+          this->runAfterPass(P, IR);
+        });
+  }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+  std::string Banner =
+      "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+  dbgs() << Banner;
+  if (any_isa<const Module *>(IR))
+    runAfterPass(any_cast<const Module *>(IR));
+  else if (any_isa<const Function *>(IR))
+    runAfterPass(any_cast<const Function *>(IR));
+  else if (any_isa<const LazyCallGraph::SCC *>(IR))
+    runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+  else if (any_isa<const Loop *>(IR))
+    runAfterPass(any_cast<const Loop *>(IR));
+  else
+    llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+  for (const Function &F : *M)
+    runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+  for (const LazyCallGraph::Node &N : *C)
+    runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+  if (!shouldVerifyFunction(F))
+    return;
+  ProbeFactorMap ProbeFactors;
+  for (const auto &BB : *F)
+    collectProbeFactors(&BB, ProbeFactors);
+  verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+  const Function *F = L->getHeader()->getParent();
+  runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+                                              ProbeFactorMap &ProbeFactors) {
+  for (const auto &I : *Block) {
+    if (Optional<PseudoProbe> Probe = extractProbe(I))
+      ProbeFactors[Probe->Id] += Probe->Factor;
+  }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+    const Function *F, const ProbeFactorMap &ProbeFactors) {
+  bool BannerPrinted = false;
+  auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+  for (const auto &I : ProbeFactors) {
+    float CurProbeFactor = I.second;
+    if (PrevProbeFactors.count(I.first)) {
+      float PrevProbeFactor = PrevProbeFactors[I.first];
+      if (std::abs(CurProbeFactor - PrevProbeFactor) >
+          DistributionFactorVariance) {
+        if (!BannerPrinted) {
+          dbgs() << "Function " << F->getName() << ":\n";
+          BannerPrinted = true;
+        }
+        dbgs() << "Probe " << I.first << "\tprevious factor "
+               << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+               << format("%0.2f", CurProbeFactor) << "\n";
+      }
+    }
+
+    // Update
+    PrevProbeFactors[I.first] = I.second;
+  }
+}
+
 PseudoProbeManager::PseudoProbeManager(const Module &M) {
   if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
     for (const auto *Operand : FuncInfo->operands()) {
@@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
     Function *ProbeFn =
         llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
-                     Builder.getInt32(0)};
+                     Builder.getInt32(0),
+                     Builder.getInt64(PseudoProbeFullDistributionFactor)};
     auto *Probe = Builder.CreateCall(ProbeFn, Args);
     AssignDebugLoc(Probe);
   }
@@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
     // Levarge the 32-bit discriminator field of debug data to store the ID and
     // type of a callsite probe. This gets rid of the dependency on plumbing a
     // customized metadata through the codegen pipeline.
-    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
+    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+        Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
     if (auto DIL = Call->getDebugLoc()) {
       DIL = DIL->cloneWithDiscriminator(V);
       Call->setDebugLoc(DIL);
@@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M,
 
   return PreservedAnalyses::none();
 }
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+                                          FunctionAnalysisManager &FAM) {
+  BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto BBProfileCount = [&BFI](BasicBlock *BB) {
+    return BFI.getBlockProfileCount(BB)
+               ? BFI.getBlockProfileCount(BB).getValue()
+               : 0;
+  };
+
+  // Collect the sum of execution weight for each probe.
+  ProbeFactorMap ProbeFactors;
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I))
+        ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+    }
+  }
+
+  // Fix up over-counted probes.
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+        float Sum = ProbeFactors[Probe->Id];
+        if (Sum != 0)
+          setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+      }
+    }
+  }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+                                             ModuleAnalysisManager &AM) {
+  if (UpdatePseudoProbe) {
+    for (auto &F : M) {
+      if (F.isDeclaration())
+        continue;
+      FunctionAnalysisManager &FAM =
+          AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+      runOnFunction(F, FAM);
+    }
+  }
+  return PreservedAnalyses::none();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 0b53007bb6dc..07e68c44416d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
     ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
     ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
     if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
       // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index d687ec654438..b211b0813611 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -592,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
   BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
 
   for (++BBI; BBI != E; ++BBI)
-    if (BBI->mayWriteToMemory())
+    if (BBI->mayWriteToMemory()) {
+      // Calls that only access inaccessible memory do not block sinking the
+      // load.
+      if (auto *CB = dyn_cast<CallBase>(BBI))
+        if (CB->onlyAccessesInaccessibleMemory())
+          continue;
       return false;
+    }
 
   // Check for non-address taken alloca.  If not address-taken already, it isn't
   // profitable to do this xform.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index c265516213aa..16efe863779a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -345,10 +345,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         return false;
 
       // Get the constant out of the ICmp, if there is one.
+      // Only try this when exactly 1 operand is a constant (if both operands
+      // are constant, the icmp should eventually simplify). Otherwise, we may
+      // invert the transform that reduces set bits and infinite-loop.
+      Value *X;
       const APInt *CmpC;
       ICmpInst::Predicate Pred;
-      if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) ||
-          CmpC->getBitWidth() != SelC->getBitWidth())
+      if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||
+          isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
         return ShrinkDemandedConstant(I, OpNo, DemandedMask);
 
       // If the constant is already the same as the ICmp, leave it as-is.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 518e909e8ab4..828fd49524ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3878,9 +3878,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
         }
       }
 
-      // Skip processing debug intrinsics in InstCombine. Processing these call instructions
-      // consumes non-trivial amount of time and provides no value for the optimization.
-      if (!isa<DbgInfoIntrinsic>(Inst)) {
+      // Skip processing debug and pseudo intrinsics in InstCombine. Processing
+      // these call instructions consumes non-trivial amount of time and
+      // provides no value for the optimization.
+      if (!Inst->isDebugOrPseudoInst()) {
         InstrsForInstCombineWorklist.push_back(Inst);
         SeenAliasScopes.analyse(Inst);
       }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
index 2b649732a799..ce4e5e575fbf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() {
 
 bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
   // TODO -- use llvm::isInstructionTriviallyDead
-  if (I.isEHPad() || I.mayHaveSideEffects()) {
+  if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) {
     // Skip any value profile instrumentation calls if they are
     // instrumenting constants.
     if (isInstrumentsConstant(I))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 96aef90c1c1a..10b08b4e2224 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -2076,6 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
     ValueMapping[PN] = NewPN;
   }
 
+  // Clone noalias scope declarations in the threaded block. When threading a
+  // loop exit, we would otherwise end up with two idential scope declarations
+  // visible at the same time.
+  SmallVector<MDNode *> NoAliasScopes;
+  DenseMap<MDNode *, MDNode *> ClonedScopes;
+  LLVMContext &Context = PredBB->getContext();
+  identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
+  cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
+
   // Clone the non-phi instructions of the source basic block into NewBB,
   // keeping track of the mapping and using it to remap operands in the cloned
   // instructions.
@@ -2084,6 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
     New->setName(BI->getName());
     NewBB->getInstList().push_back(New);
     ValueMapping[&*BI] = New;
+    adaptNoAliasScopes(New, ClonedScopes, Context);
 
     // Remap operands to patch up intra-block references.
     for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 18717394d384..822a786fc7c7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1114,12 +1114,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(
 
         Loop *L = LI->getLoopFor(I->getParent());
         auto *DefiningAccess = MemA->getDefiningAccess();
-        // If the defining access is a MemoryPhi in the header, get the incoming
-        // value for the pre-header as defining access.
-        if (DefiningAccess->getBlock() == I->getParent()) {
+        // Get the first defining access before the loop.
+        while (L->contains(DefiningAccess->getBlock())) {
+          // If the defining access is a MemoryPhi, get the incoming
+          // value for the pre-header as defining access.
           if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
             DefiningAccess =
                 MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
+          } else {
+            DefiningAccess =
+                cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
           }
         }
         MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index d111a6ba4241..af510f1a84bf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2524,7 +2524,7 @@ private:
                                               NewAI.getAlign(), LI.isVolatile(),
                                               LI.getName());
       if (AATags)
-        NewLI->setAAMetadata(AATags);
+        NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
       if (NewLI->isAtomic())
@@ -2563,7 +2563,7 @@ private:
           IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
                                 getSliceAlign(), LI.isVolatile(), LI.getName());
       if (AATags)
-        NewLI->setAAMetadata(AATags);
+        NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
 
@@ -2626,7 +2626,7 @@ private:
     }
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
     if (AATags)
-      Store->setAAMetadata(AATags);
+      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     Pass.DeadInsts.push_back(&SI);
 
     LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
@@ -2650,7 +2650,7 @@ private:
     Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
     if (AATags)
-      Store->setAAMetadata(AATags);
+      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     Pass.DeadInsts.push_back(&SI);
     LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
     return true;
@@ -2720,7 +2720,7 @@ private:
     NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
     if (AATags)
-      NewSI->setAAMetadata(AATags);
+      NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     if (SI.isVolatile())
       NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
     if (NewSI->isAtomic())
@@ -2816,7 +2816,7 @@ private:
           getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
           MaybeAlign(getSliceAlign()), II.isVolatile());
       if (AATags)
-        New->setAAMetadata(AATags);
+        New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
       return false;
     }
@@ -2885,7 +2885,7 @@ private:
     StoreInst *New =
         IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
     if (AATags)
-      New->setAAMetadata(AATags);
+      New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
     return !II.isVolatile();
   }
@@ -3006,7 +3006,7 @@ private:
       CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
                                        Size, II.isVolatile());
       if (AATags)
-        New->setAAMetadata(AATags);
+        New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
       return false;
     }
@@ -3060,7 +3060,7 @@ private:
       LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
                                              II.isVolatile(), "copyload");
       if (AATags)
-        Load->setAAMetadata(AATags);
+        Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       Src = Load;
     }
 
@@ -3080,7 +3080,7 @@ private:
     StoreInst *Store = cast<StoreInst>(
         IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
     if (AATags)
-      Store->setAAMetadata(AATags);
+      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
     return !II.isVolatile();
   }
@@ -3381,8 +3381,13 @@ private:
           IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
       LoadInst *Load =
           IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
-      if (AATags)
-        Load->setAAMetadata(AATags);
+
+      APInt Offset(
+          DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+      if (AATags &&
+          GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+        Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       LLVM_DEBUG(dbgs() << "          to: " << *Load << "\n");
     }
@@ -3428,8 +3433,13 @@ private:
           IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
       StoreInst *Store =
           IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
-      if (AATags)
-        Store->setAAMetadata(AATags);
+
+      APInt Offset(
+          DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+      if (AATags &&
+          GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+        Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
       LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
     }
   };
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index f4afa3ad4623..dba5403f272a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -170,16 +170,6 @@ static bool setRetAndArgsNoUndef(Function &F) {
   return setRetNoUndef(F) | setArgsNoUndef(F);
 }
 
-static bool setRetNonNull(Function &F) {
-  assert(F.getReturnType()->isPointerTy() &&
-         "nonnull applies only to pointers");
-  if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull))
-    return false;
-  F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
-  ++NumNonNull;
-  return true;
-}
-
 static bool setReturnedArg(Function &F, unsigned ArgNo) {
   if (F.hasParamAttribute(ArgNo, Attribute::Returned))
     return false;
@@ -1005,63 +995,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
     return Changed;
-  case LibFunc_ZdlPvRKSt9nothrow_t: // delete(void*, nothrow)
-  case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: // delete(void*, align_val_t, nothrow)
-  case LibFunc_ZdaPvRKSt9nothrow_t: // delete[](void*, nothrow)
-  case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: // delete[](void*, align_val_t, nothrow)
-    Changed |= setDoesNotThrow(F);
-    LLVM_FALLTHROUGH;
-  case LibFunc_ZdlPv: // delete(void*)
-  case LibFunc_ZdlPvj: // delete(void*, unsigned int)
-  case LibFunc_ZdlPvm: // delete(void*, unsigned long)
-  case LibFunc_ZdaPv: // delete[](void*)
-  case LibFunc_ZdaPvj: // delete[](void*, unsigned int)
-  case LibFunc_ZdaPvm: // delete[](void*, unsigned long)
-  case LibFunc_ZdlPvSt11align_val_t: // delete(void*, align_val_t)
-  case LibFunc_ZdlPvjSt11align_val_t: // delete(void*, unsigned int, align_val_t)
-  case LibFunc_ZdlPvmSt11align_val_t: // delete(void*, unsigned long, align_val_t)
-  case LibFunc_ZdaPvSt11align_val_t: // delete[](void*, align_val_t)
-  case LibFunc_ZdaPvjSt11align_val_t: // delete[](void*, unsigned int, align_val_t)
-  case LibFunc_ZdaPvmSt11align_val_t: // delete[](void*, unsigned long, align_val_t);
-    Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
-    Changed |= setArgsNoUndef(F);
-    Changed |= setWillReturn(F);
-    Changed |= setDoesNotCapture(F, 0);
-    return Changed;
-  case LibFunc_ZnwjRKSt9nothrow_t: // new(unsigned int, nothrow)
-  case LibFunc_ZnwmRKSt9nothrow_t: // new(unsigned long, nothrow)
-  case LibFunc_ZnajRKSt9nothrow_t: // new[](unsigned int, nothrow)
-  case LibFunc_ZnamRKSt9nothrow_t: // new[](unsigned long, nothrow)
-  case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: // new(unsigned int, align_val_t, nothrow)
-  case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: // new(unsigned long, align_val_t, nothrow)
-  case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: // new[](unsigned int, align_val_t, nothrow)
-  case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: // new[](unsigned long, align_val_t, nothrow)
-    // Nothrow operator new may return null pointer
-    Changed |= setDoesNotThrow(F);
-    Changed |= setOnlyAccessesInaccessibleMemory(F);
-    Changed |= setRetNoUndef(F);
-    Changed |= setRetDoesNotAlias(F);
-    Changed |= setWillReturn(F);
-    return Changed;
-  case LibFunc_Znwj: // new(unsigned int)
-  case LibFunc_Znwm: // new(unsigned long)
-  case LibFunc_Znaj: // new[](unsigned int)
-  case LibFunc_Znam: // new[](unsigned long)
-  case LibFunc_ZnwjSt11align_val_t: // new(unsigned int, align_val_t)
-  case LibFunc_ZnwmSt11align_val_t: // new(unsigned long, align_val_t)
-  case LibFunc_ZnajSt11align_val_t: // new[](unsigned int, align_val_t)
-  case LibFunc_ZnamSt11align_val_t: // new[](unsigned long, align_val_t)
-  case LibFunc_msvc_new_int: // new(unsigned int)
-  case LibFunc_msvc_new_longlong: // new(unsigned long long)
-  case LibFunc_msvc_new_array_int: // new[](unsigned int)
-  case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
-    Changed |= setOnlyAccessesInaccessibleMemory(F);
-    // Operator new always returns a nonnull noalias pointer
-    Changed |= setRetNoUndef(F);
-    Changed |= setRetNonNull(F);
-    Changed |= setRetDoesNotAlias(F);
-    Changed |= setWillReturn(F);
-    return Changed;
   // TODO: add LibFunc entries for:
   // case LibFunc_memset_pattern4:
   // case LibFunc_memset_pattern8:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 51a49574e55d..6ab061510a60 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -989,3 +989,11 @@ void llvm::identifyNoAliasScopesToClone(
       if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
         NoAliasDeclScopes.push_back(Decl->getScopeList());
 }
+
+void llvm::identifyNoAliasScopesToClone(
+    BasicBlock::iterator Start, BasicBlock::iterator End,
+    SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
+  for (Instruction &I : make_range(Start, End))
+    if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+      NoAliasDeclScopes.push_back(Decl->getScopeList());
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 0ac8fa537f4e..3026342cc4a6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -921,14 +921,20 @@ void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) {
     if (!I)
       continue;
 
+    // Only update scopes when we find them in the map. If they are not, it is
+    // because we already handled that instruction before. This is faster than
+    // tracking which instructions we already updated.
     if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope))
-      I->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]);
+      if (MDNode *MNew = MDMap.lookup(M))
+        I->setMetadata(LLVMContext::MD_alias_scope, MNew);
 
     if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias))
-      I->setMetadata(LLVMContext::MD_noalias, MDMap[M]);
+      if (MDNode *MNew = MDMap.lookup(M))
+        I->setMetadata(LLVMContext::MD_noalias, MNew);
 
     if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
-      Decl->setScopeList(MDMap[Decl->getScopeList()]);
+      if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
+        Decl->setScopeList(MNew);
   }
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index 477ea458c763..ae26058c210c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -420,13 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
     return true;
   }
 
-  if (auto *CB = dyn_cast<CallBase>(I)) {
-    // Treat calls that may not return as alive.
-    // TODO: Remove the intrinsic escape hatch once all intrinsics set
-    // willreturn properly.
-    if (!CB->willReturn() && !isa<IntrinsicInst>(I))
-      return false;
-  }
+  if (!I->willReturn())
+    return false;
 
   if (!I->mayHaveSideEffects())
     return true;
@@ -923,6 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN,
 /// \param IncomingValues A map from block to value.
 static void replaceUndefValuesInPhi(PHINode *PN,
                                     const IncomingValueMap &IncomingValues) {
+  SmallVector<unsigned> TrueUndefOps;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     Value *V = PN->getIncomingValue(i);
 
@@ -930,10 +926,31 @@ static void replaceUndefValuesInPhi(PHINode *PN,
 
     BasicBlock *BB = PN->getIncomingBlock(i);
     IncomingValueMap::const_iterator It = IncomingValues.find(BB);
-    if (It == IncomingValues.end()) continue;
 
+    // Keep track of undef/poison incoming values. Those must match, so we fix
+    // them up below if needed.
+    // Note: this is conservatively correct, but we could try harder and group
+    // the undef values per incoming basic block.
+    if (It == IncomingValues.end()) {
+      TrueUndefOps.push_back(i);
+      continue;
+    }
+
+    // There is a defined value for this incoming block, so map this undef
+    // incoming value to the defined value.
     PN->setIncomingValue(i, It->second);
   }
+
+  // If there are both undef and poison values incoming, then convert those
+  // values to undef. It is invalid to have different values for the same
+  // incoming block.
+  unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) {
+    return isa<PoisonValue>(PN->getIncomingValue(i));
+  });
+  if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) {
+    for (unsigned i : TrueUndefOps)
+      PN->setIncomingValue(i, UndefValue::get(PN->getType()));
+  }
 }
 
 /// Replace a value flowing from a block to a phi with
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
index cb5fee7d28e6..befacb591762 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -509,7 +509,7 @@ static void cloneLoopBlocks(
     SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
     SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
     ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
-    LoopInfo *LI) {
+    LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   BasicBlock *PreHeader = L->getLoopPreheader();
@@ -545,6 +545,15 @@ static void cloneLoopBlocks(
     }
   }
 
+  {
+    // Identify what other metadata depends on the cloned version. After
+    // cloning, replace the metadata with the corrected version for both
+    // memory instructions and noalias intrinsics.
+    std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
+    cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+                               Header->getContext(), Ext);
+  }
+
   // Recursively create the new Loop objects for nested loops, if any,
   // to preserve LoopInfo.
   for (Loop *ChildLoop : *L) {
@@ -769,13 +778,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   uint64_t ExitWeight = 0, FallThroughWeight = 0;
   initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
 
+  // Identify what noalias metadata is inside the loop: if it is inside the
+  // loop, the associated metadata must be cloned for each iteration.
+  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
   // For each peeled-off iteration, make a copy of the loop.
   for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
     SmallVector<BasicBlock *, 8> NewBlocks;
     ValueToValueMapTy VMap;
 
     cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
-                    LoopBlocks, VMap, LVMap, DT, LI);
+                    LoopBlocks, VMap, LVMap, DT, LI,
+                    LoopLocalNoAliasDeclScopes);
 
     // Remap to use values from the current iteration instead of the
     // previous one.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7cfe17618cde..de9560df9785 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1628,6 +1628,11 @@ static bool canSinkInstructions(
         I->getType()->isTokenTy())
       return false;
 
+    // Do not try to sink an instruction in an infinite loop - it can cause
+    // this algorithm to infinite loop.
+    if (I->getParent()->getSingleSuccessor() == I->getParent())
+      return false;
+
     // Conservatively return false if I is an inline-asm instruction. Sinking
     // and merging inline-asm instructions can potentially create arguments
     // that cannot satisfy the inline-asm constraints.
@@ -1714,13 +1719,13 @@ static bool canSinkInstructions(
   return true;
 }
 
-// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
+// Assuming canSinkInstructions(Blocks) has returned true, sink the last
 // instruction of every block in Blocks to their common successor, commoning
 // into one instruction.
 static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
 
-  // canSinkLastInstruction returning true guarantees that every block has at
+  // canSinkInstructions returning true guarantees that every block has at
   // least one non-terminator instruction.
   SmallVector<Instruction*,4> Insts;
   for (auto *BB : Blocks) {
@@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   }
 
   // The only checking we need to do now is that all users of all instructions
-  // are the same PHI node. canSinkLastInstruction should have checked this but
-  // it is slightly over-aggressive - it gets confused by commutative instructions
-  // so double-check it here.
+  // are the same PHI node. canSinkInstructions should have checked this but
+  // it is slightly over-aggressive - it gets confused by commutative
+  // instructions so double-check it here.
   Instruction *I0 = Insts.front();
   if (!I0->user_empty()) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
@@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       return false;
   }
 
-  // We don't need to do any more checking here; canSinkLastInstruction should
+  // We don't need to do any more checking here; canSinkInstructions should
   // have done it all for us.
   SmallVector<Value*, 4> NewOperands;
   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
-    // This check is different to that in canSinkLastInstruction. There, we
+    // This check is different to that in canSinkInstructions. There, we
     // cared about the global view once simplifycfg (and instcombine) have
     // completed - it takes into account PHIs that become trivially
     // simplifiable.  However here we need a more local view; if an operand
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 1795470fa58c..19797e6f7858 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -142,6 +142,10 @@ public:
     return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
   }
 
+  VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
+    return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
+  }
+
   //===--------------------------------------------------------------------===//
   // RAII helpers.
   //===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea0d7673edf6..b456a97aa4ec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) {
 
 /// A helper function that returns true if the given type is irregular. The
 /// type is irregular if its allocated size doesn't equal the store size of an
-/// element of the corresponding vector type at the given vectorization factor.
-static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
-  // Determine if an array of VF elements of type Ty is "bitcast compatible"
-  // with a <VF x Ty> vector.
-  if (VF.isVector()) {
-    auto *VectorTy = VectorType::get(Ty, VF);
-    return TypeSize::get(VF.getKnownMinValue() *
-                             DL.getTypeAllocSize(Ty).getFixedValue(),
-                         VF.isScalable()) != DL.getTypeStoreSize(VectorTy);
-  }
-
-  // If the vectorization factor is one, we just check if an array of type Ty
-  // requires padding between elements.
+/// element of the corresponding vector type.
+static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
+  // Determine if an array of N elements of type Ty is "bitcast compatible"
+  // with a <N x Ty> vector.
+  // This is only true if there is no padding between the array elements.
   return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
 }
 
@@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
   // requires padding and will be scalarized.
   auto &DL = I->getModule()->getDataLayout();
   auto *ScalarTy = getMemInstValueType(I);
-  if (hasIrregularType(ScalarTy, DL, VF))
+  if (hasIrregularType(ScalarTy, DL))
     return false;
 
   // Check if masking is required.
@@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
   // requires padding and will be scalarized.
   auto &DL = I->getModule()->getDataLayout();
   auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
-  if (hasIrregularType(ScalarTy, DL, VF))
+  if (hasIrregularType(ScalarTy, DL))
     return false;
 
   return true;
@@ -5504,11 +5496,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     return None;
   }
 
-  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
-
   switch (ScalarEpilogueStatus) {
   case CM_ScalarEpilogueAllowed:
-    return MaxVF;
+    return computeFeasibleMaxVF(TC, UserVF);
   case CM_ScalarEpilogueNotAllowedUsePredicate:
     LLVM_FALLTHROUGH;
   case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5546,7 +5536,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
                            "scalar epilogue instead.\n");
       ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
-      return MaxVF;
+      return computeFeasibleMaxVF(TC, UserVF);
     }
     return None;
   }
@@ -5563,6 +5553,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
   }
 
+  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
   assert(!MaxVF.isScalable() &&
          "Scalable vectors do not yet support tail folding");
   assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
@@ -8196,8 +8187,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
   if (BI->getSuccessor(0) != Dst)
     EdgeMask = Builder.createNot(EdgeMask);
 
-  if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
-    EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
+  if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
+    // The condition is 'SrcMask && EdgeMask', which is equivalent to
+    // 'select i1 SrcMask, i1 EdgeMask, i1 false'.
+    // The select version does not introduce new UB if SrcMask is false and
+    // EdgeMask is poison. Using 'and' here introduces undefined behavior.
+    VPValue *False = Plan->getOrAddVPValue(
+        ConstantInt::getFalse(BI->getCondition()->getType()));
+    EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
+  }
 
   return EdgeMaskCache[Edge] = EdgeMask;
 }