aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2890
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp199
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp25
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp421
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h13
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp157
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp90
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp432
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h15
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp83
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp617
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp126
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp877
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp887
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h96
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp183
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp75
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1092
24 files changed, 5946 insertions, 2409 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 654879115ff9..0a3ebd73d272 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,8 +20,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -69,9 +67,11 @@
#include <cstdint>
#include <functional>
#include <iterator>
+#include <optional>
#include <string>
#include <tuple>
#include <utility>
+#include <variant>
using namespace llvm;
@@ -135,6 +135,11 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
+static cl::opt<bool> EnableVectorFCopySignExtendRound(
+ "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Enable merging extends and rounds into FCOPYSIGN on vector types"));
+
namespace {
class DAGCombiner {
@@ -246,8 +251,8 @@ namespace {
for (MVT VT : MVT::all_valuetypes())
if (EVT(VT).isSimple() && VT != MVT::Other &&
TLI.isTypeLegal(EVT(VT)) &&
- VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
- MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
+ VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
}
void ConsiderForPruning(SDNode *N) {
@@ -382,6 +387,10 @@ namespace {
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
+ SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC);
+
/// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
@@ -434,6 +443,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
@@ -494,6 +504,8 @@ namespace {
SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+ bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
+
SDValue visitSTORE(SDNode *N);
SDValue visitLIFETIME_END(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
@@ -508,6 +520,8 @@ namespace {
SDValue visitMSTORE(SDNode *N);
SDValue visitMGATHER(SDNode *N);
SDValue visitMSCATTER(SDNode *N);
+ SDValue visitVPGATHER(SDNode *N);
+ SDValue visitVPSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFP_TO_BF16(SDNode *N);
@@ -551,6 +565,7 @@ namespace {
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
+ SDValue foldABSToABD(SDNode *N);
SDValue unfoldMaskedMerge(SDNode *N);
SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
@@ -567,6 +582,7 @@ namespace {
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
@@ -602,6 +618,7 @@ namespace {
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
+ SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecTruncToBitCast(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
@@ -1204,19 +1221,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
- // Replace all uses. If any nodes become isomorphic to other nodes and
- // are deleted, make sure to remove them from our worklist.
- WorklistRemover DeadNodes(*this);
+ // Replace all uses.
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorklistWithUsers(TLO.New.getNode());
- // Finally, if the node is now dead, remove it from the graph. The node
- // may not be dead if the replacement process recursively simplified to
- // something else needing this node.
- if (TLO.Old->use_empty())
- deleteAndRecombine(TLO.Old.getNode());
+ // Finally, if the node is now dead, remove it from the graph.
+ recursivelyDeleteUnusedNodes(TLO.Old.getNode());
}
/// Check the specified integer node value to see if it can be simplified or if
@@ -1263,11 +1275,12 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
Trunc.dump(&DAG); dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
+
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
- deleteAndRecombine(Load);
+
AddToWorklist(Trunc.getNode());
+ recursivelyDeleteUnusedNodes(Load);
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
@@ -1522,13 +1535,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
Result.dump(&DAG); dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
+
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
- deleteAndRecombine(N);
+
AddToWorklist(Result.getNode());
+ recursivelyDeleteUnusedNodes(N);
return true;
}
+
return false;
}
@@ -1746,7 +1761,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::AssertAlign: return visitAssertAlign(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
@@ -1964,7 +1980,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Changed = true;
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
default:
// Only add if it isn't already in the list.
@@ -2187,54 +2203,29 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
std::swap(N0, N1);
// TODO: Should this apply to scalar select too?
- if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
+ if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
return SDValue();
+ // We can't hoist div/rem because of immediate UB (not speculatable).
unsigned Opcode = N->getOpcode();
+ if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDValue Cond = N1.getOperand(0);
SDValue TVal = N1.getOperand(1);
SDValue FVal = N1.getOperand(2);
- // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
- // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
- // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
- auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
- if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
- switch (Opcode) {
- case ISD::FADD: // X + -0.0 --> X
- return C->isZero() && C->isNegative();
- case ISD::FSUB: // X - 0.0 --> X
- return C->isZero() && !C->isNegative();
- case ISD::FMUL: // X * 1.0 --> X
- case ISD::FDIV: // X / 1.0 --> X
- return C->isExactlyValue(1.0);
- }
- }
- if (ConstantSDNode *C = isConstOrConstSplat(V)) {
- switch (Opcode) {
- case ISD::ADD: // X + 0 --> X
- case ISD::SUB: // X - 0 --> X
- case ISD::SHL: // X << 0 --> X
- case ISD::SRA: // X s>> 0 --> X
- case ISD::SRL: // X u>> 0 --> X
- return C->isZero();
- case ISD::MUL: // X * 1 --> X
- return C->isOne();
- }
- }
- return false;
- };
-
// This transform increases uses of N0, so freeze it to be safe.
// binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
- if (isIdentityConstantForOpcode(Opcode, TVal)) {
+ unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
+ if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
SDValue F0 = DAG.getFreeze(N0);
SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
}
// binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
- if (isIdentityConstantForOpcode(Opcode, FVal)) {
+ if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
SDValue F0 = DAG.getFreeze(N0);
SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
@@ -2289,8 +2280,8 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
// or X, (select Cond, -1, 0) --> select Cond, -1, X
bool CanFoldNonConst =
(BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
- (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
- (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
+ ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) ||
+ (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT)));
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
@@ -2298,23 +2289,41 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!DAG.isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();
- // We have a select-of-constants followed by a binary operator with a
- // constant. Eliminate the binop by pulling the constant math into the select.
- // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
SDLoc DL(Sel);
- SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
- : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
- if (!CanFoldNonConst && !NewCT.isUndef() &&
- !isConstantOrConstantVector(NewCT, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
- return SDValue();
+ SDValue NewCT, NewCF;
- SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
- : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
- if (!CanFoldNonConst && !NewCF.isUndef() &&
- !isConstantOrConstantVector(NewCF, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
- return SDValue();
+ if (CanFoldNonConst) {
+ // If CBO is an opaque constant, we can't rely on getNode to constant fold.
+ if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
+ (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
+ NewCT = CT;
+ else
+ NewCT = CBO;
+
+ if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
+ (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
+ NewCF = CF;
+ else
+ NewCF = CBO;
+ } else {
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the
+ // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
+ // CBO, CF + CBO
+ NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
+ : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
+ if (!CanFoldNonConst && !NewCT.isUndef() &&
+ !isConstantOrConstantVector(NewCT, true) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
+ return SDValue();
+
+ NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
+ : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
+ if (!CanFoldNonConst && !NewCF.isUndef() &&
+ !isConstantOrConstantVector(NewCF, true) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
+ return SDValue();
+ }
SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
SelectOp->setFlags(BO->getFlags());
@@ -2668,9 +2677,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
// fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
- if ((N0.getOpcode() == ISD::ADD) &&
- (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
- (N1.getOpcode() == ISD::VSCALE)) {
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::VSCALE &&
+ N1.getOpcode() == ISD::VSCALE) {
const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
const APInt &VS1 = N1->getConstantOperandAPInt(0);
SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
@@ -2687,9 +2696,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
// Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
- if ((N0.getOpcode() == ISD::ADD) &&
- (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
- (N1.getOpcode() == ISD::STEP_VECTOR)) {
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
+ N1.getOpcode() == ISD::STEP_VECTOR) {
const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
const APInt &SV1 = N1->getConstantOperandAPInt(0);
APInt NewStep = SV0 + SV1;
@@ -2789,16 +2798,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
/// the opcode and bypass the mask operation.
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
SelectionDAG &DAG, const SDLoc &DL) {
+ if (N1.getOpcode() == ISD::ZERO_EXTEND)
+ N1 = N1.getOperand(0);
+
if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
return SDValue();
EVT VT = N0.getValueType();
- if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
+ N10 = N10.getOperand(0);
+
+ if (N10.getValueType() != VT)
+ return SDValue();
+
+ if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
return SDValue();
// add N0, (and (AssertSext X, i1), 1) --> sub N0, X
// sub N0, (and (AssertSext X, i1), 1) --> add N0, X
- return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
}
/// Helper for doing combines based on N0 and N1 being added to each other.
@@ -3079,6 +3098,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
return Combined;
+ // We want to avoid useless duplication.
+ // TODO: This is done automatically for binary operations. As ADDCARRY is
+ // not a binary operation, this is not really possible to leverage this
+ // existing mechanism for it. However, if more operations require the same
+ // deduplication logic, then it may be worth generalize.
+ SDValue Ops[] = {N1, N0, CarryIn};
+ SDNode *CSENode =
+ DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
+ if (CSENode)
+ return SDValue(CSENode, 0);
+
return SDValue();
}
@@ -3110,7 +3140,7 @@ SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
* (addcarry X, 0, (addcarry A, B, Z):Carry)
*
* The end result is usually an increase in operation required, but because the
- * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ * carry is now linearized, other transforms can kick in and optimize the DAG.
*
* Patterns typically look something like
* (uaddo A, B)
@@ -3492,11 +3522,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (VT.isVector()) {
SDValue N1S = DAG.getSplatValue(N1, true);
if (N1S && N1S.getOpcode() == ISD::SUB &&
- isNullConstant(N1S.getOperand(0))) {
- if (VT.isScalableVector())
- return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
- return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
- }
+ isNullConstant(N1S.getOperand(0)))
+ return DAG.getSplat(VT, DL, N1S.getOperand(1));
}
}
@@ -3625,7 +3652,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return V;
// (x - y) - 1 -> add (xor y, -1), x
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
@@ -3642,26 +3669,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// Hoist one-use addition by non-opaque constant:
// (x + C) - y -> (x - y) + C
- if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
}
// y - (x + C) -> (y - x) - C
- if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
}
// (x - C) - y -> (x - y) - C
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
}
// (C - x) - y -> C - (x + y)
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
@@ -3716,7 +3743,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
- if (N1.getOpcode() == ISD::VSCALE) {
+ if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
const APInt &IntVal = N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
}
@@ -3749,6 +3776,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
}
+ // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
+ if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
+ N0.getResNo() == 0 && N0.hasOneUse())
+ return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
+ N0.getOperand(0), N1, N0.getOperand(2));
+
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
@@ -3772,6 +3805,24 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // max(a,b) - min(a,b) --> abd(a,b)
+ auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
+ if (N0.getOpcode() != Max || N1.getOpcode() != Min)
+ return SDValue();
+ if ((N0.getOperand(0) != N1.getOperand(0) ||
+ N0.getOperand(1) != N1.getOperand(1)) &&
+ (N0.getOperand(0) != N1.getOperand(1) ||
+ N0.getOperand(1) != N1.getOperand(0)))
+ return SDValue();
+ if (!TLI.isOperationLegalOrCustom(Abd, VT))
+ return SDValue();
+ return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
+ };
+ if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
+ return R;
+ if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
+ return R;
+
return SDValue();
}
@@ -3996,8 +4047,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnes())
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNegative(N0, DL, VT);
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
@@ -4021,6 +4071,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
+ // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
+ // hi result is in use in case we hit this mid-legalization.
+ for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+ SDVTList LoHiVT = DAG.getVTList(VT, VT);
+ // TODO: Can we match commutable operands with getNodeIfExists?
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ }
+ }
+
// Try to transform:
// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
// mul x, (2^N + 1) --> add (shl x, N), x
@@ -4064,7 +4129,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getConstant(TZeros, DL, VT)))
: DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
- R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
+ R = DAG.getNegative(R, DL, VT);
return R;
}
}
@@ -4108,21 +4173,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
- if (N0.getOpcode() == ISD::VSCALE)
- if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- const APInt &C1 = NC1->getAPIntValue();
- return DAG.getVScale(DL, VT, C0 * C1);
- }
+ ConstantSDNode *NC1 = isConstOrConstSplat(N1);
+ if (N0.getOpcode() == ISD::VSCALE && NC1) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
+ return DAG.getVScale(DL, VT, C0 * C1);
+ }
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
APInt MulVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR)
- if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- APInt NewStep = C0 * MulVal;
- return DAG.getStepVector(DL, VT, NewStep);
- }
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ APInt NewStep = C0 * MulVal;
+ return DAG.getStepVector(DL, VT, NewStep);
+ }
// Fold ((mul x, 0/undef) -> 0,
// (mul x, 1) -> x) -> x)
@@ -4318,7 +4383,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// fold (sdiv X, -1) -> 0-X
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N1C->isAllOnes())
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNegative(N0, DL, VT);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
if (N1C && N1C->getAPIntValue().isMinSignedValue())
@@ -4465,10 +4530,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
// fold (udiv X, -1) -> select(X == -1, 1, 0)
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N1C && N1C->isAllOnes())
+ if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
+ }
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -4571,7 +4637,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
// fold (urem X, -1) -> select(FX == -1, 0, FX)
// Freeze the numerator to avoid a miscompile with an undefined value.
- if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) {
+ if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
+ CCVT.isVector() == VT.isVector()) {
SDValue F0 = DAG.getFreeze(N0);
SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
@@ -5328,6 +5395,21 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
+ // For funnel shifts FSHL/FSHR:
+ // logic_op (OP x, x1, s), (OP y, y1, s) -->
+ // --> OP (logic_op x, y), (logic_op, x1, y1), s
+ if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
+ N0.getOperand(2) == N1.getOperand(2)) {
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue X1 = N0.getOperand(1);
+ SDValue Y1 = N1.getOperand(1);
+ SDValue S = N0.getOperand(2);
+ SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
+ SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
+ return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
+ }
+
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
// Only perform this optimization up until type legalization, before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
@@ -6139,6 +6221,43 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
}
+/// Given a tree of logic operations with shape like
+/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
+/// try to match and fold shift operations with the same shift amount.
+/// For example:
+/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
+/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
+static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
+ SDValue RightHand, SelectionDAG &DAG) {
+ unsigned LogicOpcode = N->getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR));
+ if (LeftHand.getOpcode() != LogicOpcode ||
+ RightHand.getOpcode() != LogicOpcode)
+ return SDValue();
+ if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
+ return SDValue();
+
+ // Try to match one of following patterns:
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
+ // Note that foldLogicOfShifts will handle commuted versions of the left hand
+ // itself.
+ SDValue CombinedShifts, W;
+ SDValue R0 = RightHand.getOperand(0);
+ SDValue R1 = RightHand.getOperand(1);
+ if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
+ W = R1;
+ else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
+ W = R0;
+ else
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -6175,8 +6294,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
- if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
- Splat && N1.hasOneUse()) {
+ if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
+ N1.hasOneUse()) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
@@ -6186,11 +6305,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
uint64_t ElementSize =
LoadVT.getVectorElementType().getScalarSizeInBits();
if (Splat->getAPIntValue().isMask(ElementSize)) {
- return DAG.getMaskedLoad(
+ auto NewLoad = DAG.getMaskedLoad(
ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ bool LoadHasOtherUsers = !N0.hasOneUse();
+ CombineTo(N, NewLoad);
+ if (LoadHasOtherUsers)
+ CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
+ return SDValue(N, 0);
}
}
}
@@ -6213,14 +6337,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
- // Try to convert a constant mask AND into a shuffle clear mask.
- if (VT.isVector())
- if (SDValue Shuffle = XformToShuffleWithZero(N))
- return Shuffle;
-
- if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
- return Combined;
-
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -6228,23 +6344,32 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N0.getOpcode() == ISD::OR &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
return N1;
+
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
- if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
- SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
- N0.getValueType(), N0Op0);
-
- // Replace uses of the AND with uses of the Zero extend node.
- CombineTo(N, Zext);
+ if (DAG.MaskedValueIsZero(N0Op0, Mask))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0);
+ }
- // We actually want to replace all uses of the any_extend with the
- // zero_extend, to avoid duplicating things. This will later cause this
- // AND to be folded.
- CombineTo(N0.getNode(), Zext);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
+ if (ISD::isExtOpcode(N0.getOpcode())) {
+ unsigned ExtOpc = N0.getOpcode();
+ SDValue N0Op0 = N0.getOperand(0);
+ if (N0Op0.getOpcode() == ISD::AND &&
+ (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
+ N0->hasOneUse() && N0Op0->hasOneUse()) {
+ SDLoc DL(N);
+ SDValue NewMask =
+ DAG.getNode(ISD::AND, DL, VT, N1,
+ DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
+ NewMask);
}
}
@@ -6353,6 +6478,33 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ // Try to convert a constant mask AND into a shuffle clear mask.
+ if (VT.isVector())
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
+
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
+ return Combined;
+
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
+ ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
+ SDValue Ext = N0.getOperand(0);
+ EVT ExtVT = Ext->getValueType(0);
+ SDValue Extendee = Ext->getOperand(0);
+
+ unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
+ if (N1C->getAPIntValue().isMask(ScalarWidth) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
+ // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
+ // => (extract_subvector (iN_zeroext v))
+ SDValue ZeroExtExtendee =
+ DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
+ N0.getOperand(1));
+ }
+ }
+
// fold (and (masked_gather x)) -> (zext_masked_gather x)
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
EVT MemVT = GN0->getMemoryVT();
@@ -6493,6 +6645,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = foldAndToUsubsat(N, DAG))
return V;
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
return SDValue();
}
@@ -6892,6 +7050,10 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
+ // fold or (and x, y), x --> x
+ if (N00 == N1 || N01 == N1)
+ return N1;
+
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
// TODO: Set AllowUndefs = true.
if (getBitwiseNotOperand(N01, N00,
@@ -6904,6 +7066,24 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
}
+ if (N0.getOpcode() == ISD::XOR) {
+ // fold or (xor x, y), x --> or x, y
+ // or (xor x, y), (x and/or y) --> or x, y
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (N00 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
+ if (N01 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
+
+ if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+ if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
+ }
+ }
+
if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
return R;
@@ -7093,10 +7273,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
return SDValue();
}
-static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
+static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
+ SDValue &Mask) {
if (Op.getOpcode() == ISD::AND &&
DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
@@ -7106,7 +7293,7 @@ static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
}
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
-static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
+static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
SDValue &Mask) {
Op = stripConstantMask(DAG, Op, Mask);
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
@@ -7144,9 +7331,8 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
SDValue ExtractFrom, SDValue &Mask,
const SDLoc &DL) {
assert(OppShift && ExtractFrom && "Empty SDValue");
- assert(
- (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
- "Existing shift must be valid as a rotate half");
+ if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
+ return SDValue();
ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
@@ -7301,12 +7487,14 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
unsigned MaskLoBits = 0;
if (IsRotate && isPowerOf2_64(EltSize)) {
unsigned Bits = Log2_64(EltSize);
- APInt DemandedBits =
- APInt::getLowBitsSet(Neg.getScalarValueSizeInBits(), Bits);
- if (SDValue Inner =
- TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
- Neg = Inner;
- MaskLoBits = Bits;
+ unsigned NegBits = Neg.getScalarValueSizeInBits();
+ if (NegBits >= Bits) {
+ APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
+ if (SDValue Inner =
+ TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
+ Neg = Inner;
+ MaskLoBits = Bits;
+ }
}
}
@@ -7322,11 +7510,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// affect Mask's demanded bits, just replace Pos with Pos'. These operations
// are redundant for the purpose of the equality.
if (MaskLoBits) {
- APInt DemandedBits =
- APInt::getLowBitsSet(Pos.getScalarValueSizeInBits(), MaskLoBits);
- if (SDValue Inner =
- TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
- Pos = Inner;
+ unsigned PosBits = Pos.getScalarValueSizeInBits();
+ if (PosBits >= MaskLoBits) {
+ APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
+ if (SDValue Inner =
+ TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
+ Pos = Inner;
+ }
}
}
@@ -7551,6 +7741,10 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
std::swap(LHSMask, RHSMask);
}
+ // Something has gone wrong - we've lost the shl/srl pair - bail.
+ if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
+ return SDValue();
+
unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
@@ -7586,7 +7780,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
// TODO: Support pre-legalization funnel-shift by constant.
- bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
+ bool IsRotate = LHSShiftArg == RHSShiftArg;
if (!IsRotate && !(HasFSHL || HasFSHR)) {
if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
@@ -7714,87 +7908,135 @@ struct ByteProvider {
// ByteOffset is the offset of the byte in the value produced by the load.
LoadSDNode *Load = nullptr;
unsigned ByteOffset = 0;
+ unsigned VectorOffset = 0;
ByteProvider() = default;
- static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
- return ByteProvider(Load, ByteOffset);
+ static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset,
+ unsigned VectorOffset) {
+ return ByteProvider(Load, ByteOffset, VectorOffset);
}
- static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
+ static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0, 0); }
bool isConstantZero() const { return !Load; }
bool isMemory() const { return Load; }
bool operator==(const ByteProvider &Other) const {
- return Other.Load == Load && Other.ByteOffset == ByteOffset;
+ return Other.Load == Load && Other.ByteOffset == ByteOffset &&
+ Other.VectorOffset == VectorOffset;
}
private:
- ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
- : Load(Load), ByteOffset(ByteOffset) {}
+ ByteProvider(LoadSDNode *Load, unsigned ByteOffset, unsigned VectorOffset)
+ : Load(Load), ByteOffset(ByteOffset), VectorOffset(VectorOffset) {}
};
} // end anonymous namespace
/// Recursively traverses the expression calculating the origin of the requested
-/// byte of the given value. Returns None if the provider can't be calculated.
+/// byte of the given value. Returns std::nullopt if the provider can't be
+/// calculated.
+///
+/// For all the values except the root of the expression, we verify that the
+/// value has exactly one use and if not then return std::nullopt. This way if
+/// the origin of the byte is returned it's guaranteed that the values which
+/// contribute to the byte are not used outside of this expression.
+
+/// However, there is a special case when dealing with vector loads -- we allow
+/// more than one use if the load is a vector type. Since the values that
+/// contribute to the byte ultimately come from the ExtractVectorElements of the
+/// Load, we don't care if the Load has uses other than ExtractVectorElements,
+/// because those operations are independent from the pattern to be combined.
+/// For vector loads, we simply care that the ByteProviders are adjacent
+/// positions of the same vector, and their index matches the byte that is being
+/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
+/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
+/// byte position we are trying to provide for the LoadCombine. If these do
+/// not match, then we can not combine the vector loads. \p Index uses the
+/// byte position we are trying to provide for and is matched against the
+/// shl and load size. The \p Index algorithm ensures the requested byte is
+/// provided for by the pattern, and the pattern does not over provide bytes.
///
-/// For all the values except the root of the expression verifies that the value
-/// has exactly one use and if it's not true return None. This way if the origin
-/// of the byte is returned it's guaranteed that the values which contribute to
-/// the byte are not used outside of this expression.
///
-/// Because the parts of the expression are not allowed to have more than one
-/// use this function iterates over trees, not DAGs. So it never visits the same
-/// node more than once.
-static const Optional<ByteProvider>
+/// The supported LoadCombine pattern for vector loads is as follows
+/// or
+/// / \
+/// or shl
+/// / \ |
+/// or shl zext
+/// / \ | |
+/// shl zext zext EVE*
+/// | | | |
+/// zext EVE* EVE* LOAD
+/// | | |
+/// EVE* LOAD LOAD
+/// |
+/// LOAD
+///
+/// *ExtractVectorElement
+static const std::optional<ByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
- bool Root = false) {
+ std::optional<uint64_t> VectorIndex,
+ unsigned StartingIndex = 0) {
+
// Typical i64 by i8 pattern requires recursion up to 8 calls depth
if (Depth == 10)
- return None;
+ return std::nullopt;
+
+ // Only allow multiple uses if the instruction is a vector load (in which
+ // case we will use the load for every ExtractVectorElement)
+ if (Depth && !Op.hasOneUse() &&
+ (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
+ return std::nullopt;
- if (!Root && !Op.hasOneUse())
- return None;
+ // Fail to combine if we have encountered anything but a LOAD after handling
+ // an ExtractVectorElement.
+ if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
+ return std::nullopt;
- assert(Op.getValueType().isScalarInteger() && "can't handle other types");
unsigned BitWidth = Op.getValueSizeInBits();
if (BitWidth % 8 != 0)
- return None;
+ return std::nullopt;
unsigned ByteWidth = BitWidth / 8;
assert(Index < ByteWidth && "invalid index requested");
(void) ByteWidth;
switch (Op.getOpcode()) {
case ISD::OR: {
- auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
+ auto LHS =
+ calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
if (!LHS)
- return None;
- auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
+ return std::nullopt;
+ auto RHS =
+ calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
if (!RHS)
- return None;
+ return std::nullopt;
if (LHS->isConstantZero())
return RHS;
if (RHS->isConstantZero())
return LHS;
- return None;
+ return std::nullopt;
}
case ISD::SHL: {
auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!ShiftOp)
- return None;
+ return std::nullopt;
uint64_t BitShift = ShiftOp->getZExtValue();
+
if (BitShift % 8 != 0)
- return None;
+ return std::nullopt;
uint64_t ByteShift = BitShift / 8;
+ // If we are shifting by an amount greater than the index we are trying to
+ // provide, then do not provide anything. Otherwise, subtract the index by
+ // the amount we shifted by.
return Index < ByteShift
? ByteProvider::getConstantZero()
: calculateByteProvider(Op->getOperand(0), Index - ByteShift,
- Depth + 1);
+ Depth + 1, VectorIndex, Index);
}
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -7802,37 +8044,70 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
SDValue NarrowOp = Op->getOperand(0);
unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
if (NarrowBitWidth % 8 != 0)
- return None;
+ return std::nullopt;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
if (Index >= NarrowByteWidth)
return Op.getOpcode() == ISD::ZERO_EXTEND
- ? Optional<ByteProvider>(ByteProvider::getConstantZero())
- : None;
- return calculateByteProvider(NarrowOp, Index, Depth + 1);
+ ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ : std::nullopt;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
+ StartingIndex);
}
case ISD::BSWAP:
return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
- Depth + 1);
+ Depth + 1, VectorIndex, StartingIndex);
+ case ISD::EXTRACT_VECTOR_ELT: {
+ auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!OffsetOp)
+ return std::nullopt;
+
+ VectorIndex = OffsetOp->getZExtValue();
+
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ // Check to see if the position of the element in the vector corresponds
+ // with the byte we are trying to provide for. In the case of a vector of
+ // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
+ // the element will provide a range of bytes. For example, if we have a
+ // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
+ // 3).
+ if (*VectorIndex * NarrowByteWidth > StartingIndex)
+ return std::nullopt;
+ if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
+ return std::nullopt;
+
+ return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
+ VectorIndex, StartingIndex);
+ }
case ISD::LOAD: {
auto L = cast<LoadSDNode>(Op.getNode());
if (!L->isSimple() || L->isIndexed())
- return None;
+ return std::nullopt;
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
if (NarrowBitWidth % 8 != 0)
- return None;
+ return std::nullopt;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+ // If the width of the load does not reach byte we are trying to provide for
+ // and it is not a ZEXTLOAD, then the load does not provide for the byte in
+ // question
if (Index >= NarrowByteWidth)
return L->getExtensionType() == ISD::ZEXTLOAD
- ? Optional<ByteProvider>(ByteProvider::getConstantZero())
- : None;
- return ByteProvider::getMemory(L, Index);
+ ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ : std::nullopt;
+
+ unsigned BPVectorIndex = VectorIndex.value_or(0U);
+ return ByteProvider::getMemory(L, Index, BPVectorIndex);
}
}
- return None;
+ return std::nullopt;
}
static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
@@ -7845,13 +8120,13 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
-// endian, and None if match failed.
-static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
- int64_t FirstOffset) {
+// endian, and std::nullopt if match failed.
+static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
+ int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
if (Width < 2)
- return None;
+ return std::nullopt;
bool BigEndian = true, LittleEndian = true;
for (unsigned i = 0; i < Width; i++) {
@@ -7859,7 +8134,7 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
if (!BigEndian && !LittleEndian)
- return None;
+ return std::nullopt;
}
assert((BigEndian != LittleEndian) && "It should be either big endian or"
@@ -7922,9 +8197,13 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
// All stores must be the same size to ensure that we are writing all of the
// bytes in the wide value.
+ // This store should have exactly one use as a chain operand for another
+ // store in the merging set. If there are other chain uses, then the
+ // transform may not be safe because order of loads/stores outside of this
+ // set may not be preserved.
// TODO: We could allow multiple sizes by tracking each stored byte.
if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
- Store->isIndexed())
+ Store->isIndexed() || !Store->hasOneUse())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
@@ -7948,7 +8227,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
- Optional<BaseIndexOffset> Base;
+ std::optional<BaseIndexOffset> Base;
for (auto *Store : Stores) {
// All the stores store different parts of the CombinedValue. A truncate is
// required to get the partial value.
@@ -8016,7 +8295,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// Check that a store of the wide type is both allowed and fast on the target
const DataLayout &Layout = DAG.getDataLayout();
- bool Fast = false;
+ unsigned Fast = 0;
bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
*FirstStore->getMemOperand(), &Fast);
if (!Allowed || !Fast)
@@ -8120,7 +8399,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
- unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
+ unsigned LoadBitWidth = P.Load->getMemoryVT().getScalarSizeInBits();
+
assert(LoadBitWidth % 8 == 0 &&
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
@@ -8129,11 +8409,11 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
: littleEndianByteAt(LoadByteWidth, P.ByteOffset);
};
- Optional<BaseIndexOffset> Base;
+ std::optional<BaseIndexOffset> Base;
SDValue Chain;
SmallPtrSet<LoadSDNode *, 8> Loads;
- Optional<ByteProvider> FirstByteProvider;
+ std::optional<ByteProvider> FirstByteProvider;
int64_t FirstOffset = INT64_MAX;
// Check if all the bytes of the OR we are looking at are loaded from the same
@@ -8141,7 +8421,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
unsigned ZeroExtendedBytes = 0;
for (int i = ByteWidth - 1; i >= 0; --i) {
- auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
+ auto P =
+ calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
+ /*StartingIndex*/ i);
if (!P)
return SDValue();
@@ -8155,10 +8437,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
assert(P->isMemory() && "provenance should either be memory or zero");
LoadSDNode *L = P->Load;
- assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
- !L->isIndexed() &&
- "Must be enforced by calculateByteProvider");
- assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
// All loads must share the same chain
SDValue LChain = L->getChain();
@@ -8170,8 +8448,25 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Loads must share the same base address
BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
int64_t ByteOffsetFromBase = 0;
+
+ // For vector loads, the expected load combine pattern will have an
+ // ExtractElement for each index in the vector. While each of these
+ // ExtractElements will be accessing the same base address as determined
+ // by the load instruction, the actual bytes they interact with will differ
+ // due to different ExtractElement indices. To accurately determine the
+ // byte position of an ExtractElement, we offset the base load ptr with
+ // the index multiplied by the byte size of each element in the vector.
+ if (L->getMemoryVT().isVector()) {
+ unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
+ if (LoadWidthInBit % 8 != 0)
+ return SDValue();
+ unsigned ByteOffsetFromVector = P->VectorOffset * LoadWidthInBit / 8;
+ Ptr.addToOffset(ByteOffsetFromVector);
+ }
+
if (!Base)
Base = Ptr;
+
else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
@@ -8187,6 +8482,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
Loads.insert(L);
}
+
assert(!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value");
assert(Base && "Base address of the accessed memory location must be set");
@@ -8210,8 +8506,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- Optional<bool> IsBigEndian = isBigEndian(
- makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
+ std::optional<bool> IsBigEndian = isBigEndian(
+ ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
if (!IsBigEndian)
return SDValue();
@@ -8246,7 +8542,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
- bool Fast = false;
+ unsigned Fast = 0;
bool Allowed =
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
*FirstLoad->getMemOperand(), &Fast);
@@ -8419,6 +8715,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
+ // fold (a^b) -> (a|b) iff a and b share no bits.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+
// look for 'add-like' folds:
// XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
@@ -8510,8 +8811,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (not (add X, -1)) -> (neg X)
if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
+ return DAG.getNegative(N0.getOperand(0), DL, VT);
}
// fold (xor (and x, y), y) -> (and (not x), y)
@@ -8573,6 +8873,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return R;
if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
return R;
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
@@ -8672,13 +8974,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
- // TODO: This is limited to early combining because it may reveal regressions
- // otherwise. But since we just checked a target hook to see if this is
- // desirable, that should have filtered out cases where this interferes
- // with some other pattern matching.
- if (!LegalTypes)
- if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
- return R;
+ // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+ return R;
// We want to pull some binops through shifts, so that we have (and (shift))
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
@@ -8697,11 +8995,6 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
break;
}
- // We require the RHS of the binop to be a constant and not opaque as well.
- ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
- if (!BinOpCst)
- return SDValue();
-
// FIXME: disable this unless the input to the binop is a shift by a constant
// or is copy/select. Enable this in other cases when figure out it's exactly
// profitable.
@@ -8719,16 +9012,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
- // Fold the constants, shifting the binop RHS by the shift amount.
+ // Attempt to fold the constants, shifting the binop RHS by the shift amount.
SDLoc DL(N);
EVT VT = N->getValueType(0);
- SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
- N->getOperand(1));
- assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
+ if (SDValue NewRHS = DAG.FoldConstantArithmetic(
+ N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
+ N->getOperand(1));
+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
+ }
- SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
- N->getOperand(1));
- return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
+ return SDValue();
}
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
@@ -8806,7 +9100,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
unsigned NextOp = N0.getOpcode();
// fold (rot* (rot* x, c2), c1)
- // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
+ // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
@@ -8822,6 +9116,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
if (Norm1 && Norm2)
if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
+ CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
+ {CombinedShift, BitsizeC});
SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
@@ -9086,23 +9382,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return NewSHL;
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
- if (N0.getOpcode() == ISD::VSCALE)
- if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- const APInt &C1 = NC1->getAPIntValue();
- return DAG.getVScale(SDLoc(N), VT, C0 << C1);
- }
+ if (N0.getOpcode() == ISD::VSCALE && N1C) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = N1C->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 << C1);
+ }
// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
APInt ShlVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR)
- if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- if (ShlVal.ult(C0.getBitWidth())) {
- APInt NewStep = C0 << ShlVal;
- return DAG.getStepVector(SDLoc(N), VT, NewStep);
- }
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ if (ShlVal.ult(C0.getBitWidth())) {
+ APInt NewStep = C0 << ShlVal;
+ return DAG.getStepVector(SDLoc(N), VT, NewStep);
}
+ }
return SDValue();
}
@@ -9142,6 +9437,28 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
EVT NarrowVT = LeftOp.getOperand(0).getValueType();
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+ // return true if U may use the lower bits of its operands
+ auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
+ if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
+ return true;
+ }
+ ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
+ if (!UShiftAmtSrc) {
+ return true;
+ }
+ unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
+ return UShiftAmt < NarrowVTSize;
+ };
+
+ // If the lower part of the MUL is also used and MUL_LOHI is supported
+ // do not introduce the MULH in favor of MUL_LOHI
+ unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!ShiftOperand.hasOneUse() &&
+ TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
+ llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
+ return SDValue();
+ }
+
SDValue MulhRightOp;
if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
unsigned ActiveBits = IsSignExt
@@ -9649,16 +9966,23 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// However when after the source operand of SRL is optimized into AND, the SRL
// itself may not be optimized further. Look for it and add the BRCOND into
// the worklist.
+ //
+ // The also tends to happen for binary operations when SimplifyDemandedBits
+ // is involved.
+ //
+ // FIXME: This is unecessary if we process the DAG in topological order,
+ // which we plan to do. This workaround can be removed once the DAG is
+ // processed in topological order.
if (N->hasOneUse()) {
SDNode *Use = *N->use_begin();
- if (Use->getOpcode() == ISD::BRCOND)
- AddToWorklist(Use);
- else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
- // Also look pass the truncate.
+
+ // Look pass the truncate.
+ if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
Use = *Use->use_begin();
- if (Use->getOpcode() == ISD::BRCOND)
- AddToWorklist(Use);
- }
+
+ if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
+ Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
+ AddToWorklist(Use);
}
// Try to transform this shift into a multiply-high if
@@ -9734,7 +10058,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
uint64_t PtrOff =
IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
- bool Fast = false;
+ unsigned Fast = 0;
if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&
@@ -9817,8 +10141,8 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
// Given a ABS node, detect the following pattern:
// (ABS (SUB (EXTEND a), (EXTEND b))).
// Generates UABD/SABD instruction.
-static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+SDValue DAGCombiner::foldABSToABD(SDNode *N) {
+ EVT VT = N->getValueType(0);
SDValue AbsOp1 = N->getOperand(0);
SDValue Op0, Op1;
@@ -9831,10 +10155,14 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
unsigned Opc0 = Op0.getOpcode();
// Check if the operands of the sub are (zero|sign)-extended.
if (Opc0 != Op1.getOpcode() ||
- (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
+ // fold (abs (sub nsw x, y)) -> abds(x, y)
+ if (AbsOp1->getFlags().hasNoSignedWrap() &&
+ TLI.isOperationLegalOrCustom(ISD::ABDS, VT))
+ return DAG.getNode(ISD::ABDS, SDLoc(N), VT, Op0, Op1);
return SDValue();
+ }
- EVT VT = N->getValueType(0);
EVT VT1 = Op0.getOperand(0).getValueType();
EVT VT2 = Op1.getOperand(0).getValueType();
unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
@@ -9871,9 +10199,24 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return N0;
- if (SDValue ABD = combineABSToABD(N, DAG, TLI))
+ if (SDValue ABD = foldABSToABD(N))
return ABD;
+ // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
+ // iff zero_extend/truncate are free.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
+ TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
+ hasOperation(ISD::ABS, ExtVT)) {
+ SDLoc DL(N);
+ return DAG.getNode(
+ ISD::ZERO_EXTEND, DL, VT,
+ DAG.getNode(ISD::ABS, DL, ExtVT,
+ DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
+ }
+ }
+
return SDValue();
}
@@ -10027,14 +10370,11 @@ static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
}
-/// Generate Min/Max node
-static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
- SDValue RHS, SDValue True, SDValue False,
- ISD::CondCode CC, const TargetLowering &TLI,
- SelectionDAG &DAG) {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
+static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
switch (CC) {
case ISD::SETOLT:
@@ -10075,6 +10415,46 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+/// Generate Min/Max node
+SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True,
+ SDValue False, ISD::CondCode CC) {
+ if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
+ return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
+
+ // If we can't directly match this, try to see if we can pull an fneg out of
+ // the select.
+ SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
+ True, DAG, LegalOperations, ForCodeSize);
+ if (!NegTrue)
+ return SDValue();
+
+ HandleSDNode NegTrueHandle(NegTrue);
+
+ // Try to unfold an fneg from the select if we are comparing the negated
+ // constant.
+ //
+ // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
+ //
+ // TODO: Handle fabs
+ if (LHS == NegTrue) {
+ // If we can't directly match this, try to see if we can pull an fneg out of
+ // the select.
+ SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
+ RHS, DAG, LegalOperations, ForCodeSize);
+ if (NegRHS) {
+ HandleSDNode NegRHSHandle(NegRHS);
+ if (NegRHS == False) {
+ SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
+ False, CC, TLI, DAG);
+ return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// If a (v)select has a condition value that is a sign-bit test, try to smear
/// the condition operand sign-bit across the value width and use it as a mask.
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
@@ -10112,6 +10492,25 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
+ const TargetLowering &TLI) {
+ if (!TLI.convertSelectOfConstantsToMath(VT))
+ return false;
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
+ return true;
+ if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ return true;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
+ return true;
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
+ return true;
+
+ return false;
+}
+
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -10128,96 +10527,106 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
if (!C1 || !C2)
return SDValue();
+ if (CondVT != MVT::i1 || LegalOperations) {
+ // fold (select Cond, 0, 1) -> (xor Cond, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isZero() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
+ if (VT.bitsEq(CondVT))
+ return NotCond;
+ return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ }
+
+ return SDValue();
+ }
+
// Only do this before legalization to avoid conflicting with target-specific
// transforms in the other direction (create a select from a zext/sext). There
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
- if (CondVT == MVT::i1 && !LegalOperations) {
- if (C1->isZero() && C2->isOne()) {
- // select Cond, 0, 1 --> zext (!Cond)
- SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
- if (VT != MVT::i1)
- NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
- return NotCond;
- }
- if (C1->isZero() && C2->isAllOnes()) {
- // select Cond, 0, -1 --> sext (!Cond)
- SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
- if (VT != MVT::i1)
- NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
- return NotCond;
- }
- if (C1->isOne() && C2->isZero()) {
- // select Cond, 1, 0 --> zext (Cond)
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- return Cond;
- }
- if (C1->isAllOnes() && C2->isZero()) {
- // select Cond, -1, 0 --> sext (Cond)
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
- return Cond;
- }
-
- // Use a target hook because some targets may prefer to transform in the
- // other direction.
- if (TLI.convertSelectOfConstantsToMath(VT)) {
- // For any constants that differ by 1, we can transform the select into an
- // extend and add.
- const APInt &C1Val = C1->getAPIntValue();
- const APInt &C2Val = C2->getAPIntValue();
- if (C1Val - 1 == C2Val) {
- // select Cond, C1, C1-1 --> add (zext Cond), C1-1
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
- }
- if (C1Val + 1 == C2Val) {
- // select Cond, C1, C1+1 --> add (sext Cond), C1+1
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
- return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
- }
+ assert(CondVT == MVT::i1 && !LegalOperations);
- // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
- if (C1Val.isPowerOf2() && C2Val.isZero()) {
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- SDValue ShAmtC =
- DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
- return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
- }
+ // select Cond, 1, 0 --> zext (Cond)
+ if (C1->isOne() && C2->isZero())
+ return DAG.getZExtOrTrunc(Cond, DL, VT);
- if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
- return V;
- }
+ // select Cond, -1, 0 --> sext (Cond)
+ if (C1->isAllOnes() && C2->isZero())
+ return DAG.getSExtOrTrunc(Cond, DL, VT);
+
+ // select Cond, 0, 1 --> zext (!Cond)
+ if (C1->isZero() && C2->isOne()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
+ return NotCond;
+ }
+ // select Cond, 0, -1 --> sext (!Cond)
+ if (C1->isZero() && C2->isAllOnes()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
+ return NotCond;
+ }
+
+ // Use a target hook because some targets may prefer to transform in the
+ // other direction.
+ if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
return SDValue();
+
+ // For any constants that differ by 1, we can transform the select into
+ // an extend and add.
+ const APInt &C1Val = C1->getAPIntValue();
+ const APInt &C2Val = C2->getAPIntValue();
+
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (C1Val - 1 == C2Val) {
+ Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
- // fold (select Cond, 0, 1) -> (xor Cond, 1)
- // We can't do this reliably if integer based booleans have different contents
- // to floating point based booleans. This is because we can't tell whether we
- // have an integer-based boolean or a floating-point-based boolean unless we
- // can find the SETCC that produced it and inspect its operands. This is
- // fairly easy if C is the SETCC node, but it can potentially be
- // undiscoverable (or not reasonably discoverable). For example, it could be
- // in another basic block or it could require searching a complicated
- // expression.
- if (CondVT.isInteger() &&
- TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- C1->isZero() && C2->isOne()) {
- SDValue NotCond =
- DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
- if (VT.bitsEq(CondVT))
- return NotCond;
- return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (C1Val + 1 == C2Val) {
+ Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
+ Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC =
+ DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
+ }
+
+ // select Cond, -1, C --> or (sext Cond), C
+ if (C1->isAllOnes()) {
+ Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
+ }
+
+ // select Cond, C, -1 --> or (sext (not Cond)), C
+ if (C2->isAllOnes()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
}
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10325,10 +10734,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
- if (SDValue V = foldSelectOfConstants(N))
+ if (SDValue V = foldBoolSelectToLogic(N, DAG))
return V;
- if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
+ }
+
+ if (SDValue V = foldSelectOfConstants(N))
return V;
// If we can fold this based on the true/false value, do so.
@@ -10413,13 +10829,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
- // select (not Cond), N1, N2 -> select Cond, N2, N1
- if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
- SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
- SelectOp->setFlags(Flags);
- return SelectOp;
- }
-
// Fold selects based on a setcc into other things, such as min/max/abs.
if (N0.getOpcode() == ISD::SETCC) {
SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
@@ -10430,8 +10839,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
//
// This is OK if we don't care what happens if either operand is a NaN.
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
- if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
- CC, TLI, DAG))
+ if (SDValue FMinMax =
+ combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
return FMinMax;
// Use 'unsigned add with overflow' to optimize an unsigned saturating add.
@@ -10542,23 +10951,37 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
- SelectionDAG &DAG) {
- if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (Index.getOpcode() != ISD::ADD)
return false;
// Only perform the transformation when existing operands can be reused.
if (IndexIsScaled)
return false;
- // For now we check only the LHS of the add.
- SDValue LHS = Index.getOperand(0);
- SDValue SplatVal = DAG.getSplatValue(LHS);
- if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType())
+ if (!isNullConstant(BasePtr) && !Index.hasOneUse())
return false;
- BasePtr = SplatVal;
- Index = Index.getOperand(1);
- return true;
+ EVT VT = BasePtr.getValueType();
+ if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
+ SplatVal && SplatVal.getValueType() == VT) {
+ if (isNullConstant(BasePtr))
+ BasePtr = SplatVal;
+ else
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = Index.getOperand(1);
+ return true;
+ }
+ if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
+ SplatVal && SplatVal.getValueType() == VT) {
+ if (isNullConstant(BasePtr))
+ BasePtr = SplatVal;
+ else
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = Index.getOperand(0);
+ return true;
+ }
+ return false;
}
// Fold sext/zext of index into index type.
@@ -10593,6 +11016,37 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
return false;
}
+SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
+ VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
+ SDValue Mask = MSC->getMask();
+ SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
+ SDValue VL = MSC->getVectorLength();
+ ISD::MemIndexType IndexType = MSC->getIndexType();
+ SDLoc DL(N);
+
+ // Zap scatters with a zero mask.
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType);
+ }
+
+ if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
@@ -10608,7 +11062,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
- if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) {
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
DL, Ops, MSC->getMemOperand(), IndexType,
@@ -10674,8 +11128,9 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// If this is a TRUNC followed by a masked store, fold this into a masked
// truncating store. We can do this even if this is already a masked
// truncstore.
+ // TODO: Try combine to masked compress store if possiable.
if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
- MST->isUnindexed() &&
+ MST->isUnindexed() && !MST->isCompressingStore() &&
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
MST->getMemoryVT(), LegalOperations)) {
auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
@@ -10689,6 +11144,34 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
+ VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
+ SDValue Mask = MGT->getMask();
+ SDValue Chain = MGT->getChain();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue BasePtr = MGT->getBasePtr();
+ SDValue VL = MGT->getVectorLength();
+ ISD::MemIndexType IndexType = MGT->getIndexType();
+ SDLoc DL(N);
+
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getGatherVP(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType);
+ }
+
+ if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
+ SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getGatherVP(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
@@ -10704,7 +11187,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, PassThru, MGT->getChain());
- if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) {
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(
DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
@@ -10756,7 +11239,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
- !TLI.convertSelectOfConstantsToMath(VT) ||
+ !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
!ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
return SDValue();
@@ -10869,8 +11352,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// NaN.
//
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
- if (SDValue FMinMax =
- combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
+ if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
return FMinMax;
}
@@ -11011,8 +11493,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
/*AllowUndefs*/ true)) {
- OpRHS = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), OpRHS);
+ OpRHS = DAG.getNegative(OpRHS, DL, VT);
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
@@ -11083,6 +11564,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
if (N2 == N3)
return N2;
+ // select_cc bool, 0, x, y, seteq -> select bool, y, x
+ if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
+ isNullConstant(N1))
+ return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
+
// Determine if the condition we're dealing with is constant
if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
CC, SDLoc(N), false)) {
@@ -11297,9 +11783,11 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
SDLoc DL(N);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
- Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
- && "Expected EXTEND dag node in input!");
+ Opcode == ISD::ANY_EXTEND ||
+ Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ "Expected EXTEND dag node in input!");
// fold (sext c1) -> c1
// fold (zext c1) -> c1
@@ -11347,15 +11835,13 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
- // For zero-extensions, UNDEF elements still guarantee to have the upper
- // bits set to zero.
- bool IsZext =
- Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
-
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Op = N0.getOperand(i);
if (Op.isUndef()) {
- Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
+ if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
+ Elts.push_back(DAG.getUNDEF(SVT));
+ else
+ Elts.push_back(DAG.getConstant(0, DL, SVT));
continue;
}
@@ -11926,7 +12412,7 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
- if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
+ if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
EVT SetCCVT = getSetCCResultType(N00VT);
// Don't do this transform for i1 because there's a select transform
// that would reverse it.
@@ -11947,6 +12433,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
+ return FoldedVOp;
+
// sext(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -11959,6 +12449,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
+ // fold (sext (sext_inreg x)) -> (sext (trunc x))
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SDValue N00 = N0.getOperand(0);
+ EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
+ if (N00.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isTypeLegal(ExtVT))) {
+ SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
+ }
+ }
+
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
@@ -12095,7 +12595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
+ return DAG.getNegative(Zext, DL, VT);
}
// Eliminate this sign extend by doing a decrement in the destination type:
// sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
@@ -12192,10 +12692,41 @@ static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
}
+// If we have (zext (abs X)) where X is a type that will be promoted by type
+// legalization, convert to (abs (sext X)). But don't extend past a legal type.
+static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
+ assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
+
+ EVT VT = Extend->getValueType(0);
+ if (VT.isVector())
+ return SDValue();
+
+ SDValue Abs = Extend->getOperand(0);
+ if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
+ return SDValue();
+
+ EVT AbsVT = Abs.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
+ TargetLowering::TypePromoteInteger)
+ return SDValue();
+
+ EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
+
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
+ SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
+ return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
+ return FoldedVOp;
+
// zext(undef) = 0
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
@@ -12452,6 +12983,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
+ if (SDValue V = widenAbs(N, DAG))
+ return V;
+
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
return Res;
@@ -12878,8 +13412,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
unsigned LVTStoreBits =
- LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
+ LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
return LVTStoreBits - EVTStoreBits - ShAmt;
};
@@ -13120,16 +13654,75 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
}
+ // Fold (iM_signext_inreg
+ // (extract_subvector (zext|anyext|sext iN_v to _) _)
+ // from iN)
+ // -> (extract_subvector (signext iN_v to iM))
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
+ ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
+ SDValue InnerExt = N0.getOperand(0);
+ EVT InnerExtVT = InnerExt->getValueType(0);
+ SDValue Extendee = InnerExt->getOperand(0);
+
+ if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
+ SDValue SignExtExtendee =
+ DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
+ N0.getOperand(1));
+ }
+ }
+
return SDValue();
}
+static SDValue
+foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG,
+ bool LegalOperations) {
+ unsigned InregOpcode = N->getOpcode();
+ unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
+
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
+ Src.getValueType().getVectorElementType(),
+ VT.getVectorElementCount());
+
+ assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ "Expected EXTEND_VECTOR_INREG dag node in input!");
+
+ // Profitability check: our operand must be an one-use CONCAT_VECTORS.
+ // FIXME: one-use check may be overly restrictive
+ if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ // Profitability check: we must be extending exactly one of it's operands.
+ // FIXME: this is probably overly restrictive.
+ Src = Src.getOperand(0);
+ if (Src.getValueType() != SrcVT)
+ return SDValue();
+
+ if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
+ return SDValue();
+
+ return DAG.getNode(Opcode, SDLoc(N), VT, Src);
+}
+
SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
- if (N0.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ if (N0.isUndef()) {
+ // aext_vector_inreg(undef) = undef because the top bits are undefined.
+ // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
+ return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
+ ? DAG.getUNDEF(VT)
+ : DAG.getConstant(0, SDLoc(N), VT);
+ }
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -13137,6 +13730,10 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
+ LegalOperations))
+ return R;
+
return SDValue();
}
@@ -13394,18 +13991,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- // See if we can simplify the input to this truncate through knowledge that
- // only the low bits are being used.
- // For example "trunc (or (shl x, 8), y)" // -> trunc y
- // Currently we only perform this optimization on scalars because vectors
- // may have different active low bits.
- if (!VT.isVector()) {
- APInt Mask =
- APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
- if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
- }
-
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
@@ -13510,7 +14095,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
- bool LD1Fast = false;
+ unsigned LD1Fast = 0;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
@@ -13840,15 +14425,72 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
- // Fold freeze(bitcast(x)) -> bitcast(freeze(x)).
- // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold.
- if (N0.getOpcode() == ISD::BITCAST)
- return DAG.getBitcast(N->getValueType(0),
- DAG.getNode(ISD::FREEZE, SDLoc(N0),
- N0.getOperand(0).getValueType(),
- N0.getOperand(0)));
+ // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
+ // Try to push freeze through instructions that propagate but don't produce
+ // poison as far as possible. If an operand of freeze follows three
+ // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
+ // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
+ // the freeze through to the operands that are not guaranteed non-poison.
+ // NOTE: we will strip poison-generating flags, so ignore them here.
+ if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
+ /*ConsiderFlags*/ false) ||
+ N0->getNumValues() != 1 || !N0->hasOneUse())
+ return SDValue();
- return SDValue();
+ bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR;
+
+ SmallSetVector<SDValue, 8> MaybePoisonOperands;
+ for (SDValue Op : N0->ops()) {
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
+ /*Depth*/ 1))
+ continue;
+ bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
+ bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
+ if (!HadMaybePoisonOperands)
+ continue;
+ if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
+ // Multiple maybe-poison ops when not allowed - bail out.
+ return SDValue();
+ }
+ }
+ // NOTE: the whole op may be not guaranteed to not be undef or poison because
+ // it could create undef or poison due to it's poison-generating flags.
+ // So not finding any maybe-poison operands is fine.
+
+ for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
+ // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
+ if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
+ continue;
+ // First, freeze each offending operand.
+ SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
+ // Then, change all other uses of unfrozen operand to use frozen operand.
+ DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
+ if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
+ FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
+ // But, that also updated the use in the freeze we just created, thus
+ // creating a cycle in a DAG. Let's undo that by mutating the freeze.
+ DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
+ MaybePoisonOperand);
+ }
+ }
+
+ // The whole node may have been updated, so the value we were holding
+ // may no longer be valid. Re-fetch the operand we're `freeze`ing.
+ N0 = N->getOperand(0);
+
+ // Finally, recreate the node, it's operands were updated to use
+ // frozen operands, so we just need to use it's "original" operands.
+ SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
+ // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
+ for (SDValue &Op : Ops) {
+ if (Op.getOpcode() == ISD::UNDEF)
+ Op = DAG.getFreeze(Op);
+ }
+ // NOTE: this strips poison generating flags.
+ SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
+ assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
+ "Can't create node that may be undef/poison!");
+ return R;
}
/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
@@ -14012,26 +14654,37 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
+ // This also works with nested fma instructions:
+ // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
+ // fma A, B, (fma C, D, fma (E, F, G))
+ // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
+ // fma A, B, (fma C, D, fma (E, F, G)).
// This requires reassociation because it changes the order of operations.
- SDValue FMA, E;
- if (CanReassociate && isFusedOp(N0) &&
- N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
- N0.getOperand(2).hasOneUse()) {
- FMA = N0;
- E = N1;
- } else if (CanReassociate && isFusedOp(N1) &&
- N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
- N1.getOperand(2).hasOneUse()) {
- FMA = N1;
- E = N0;
- }
- if (FMA && E) {
- SDValue A = FMA.getOperand(0);
- SDValue B = FMA.getOperand(1);
- SDValue C = FMA.getOperand(2).getOperand(0);
- SDValue D = FMA.getOperand(2).getOperand(1);
- SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
- return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
+ if (CanReassociate) {
+ SDValue FMA, E;
+ if (isFusedOp(N0) && N0.hasOneUse()) {
+ FMA = N0;
+ E = N1;
+ } else if (isFusedOp(N1) && N1.hasOneUse()) {
+ FMA = N1;
+ E = N0;
+ }
+
+ SDValue TmpFMA = FMA;
+ while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
+ SDValue FMul = TmpFMA->getOperand(2);
+ if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) {
+ SDValue C = FMul.getOperand(0);
+ SDValue D = FMul.getOperand(1);
+ SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
+ // Replacing the inner FMul could cause the outer FMA to be simplified
+ // away.
+ return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
+ }
+
+ TmpFMA = TmpFMA->getOperand(2);
+ }
}
// Look through FP_EXTEND nodes to do more combining.
@@ -14331,8 +14984,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
- auto isContractableAndReassociableFMUL = [isContractableFMUL,
- isReassociable](SDValue N) {
+ auto isContractableAndReassociableFMUL = [&isContractableFMUL,
+ &isReassociable](SDValue N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
@@ -14567,8 +15220,8 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
+ SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -14665,8 +15318,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
- bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+ SDNode *CFP00 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ SDNode *CFP01 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
@@ -14686,8 +15341,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
if (N1.getOpcode() == ISD::FMUL) {
- bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
- bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+ SDNode *CFP10 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ SDNode *CFP11 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
@@ -14707,7 +15364,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
if (N0.getOpcode() == ISD::FADD) {
- bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ SDNode *CFP00 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
@@ -14717,7 +15375,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
if (N1.getOpcode() == ISD::FADD) {
- bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ SDNode *CFP10 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
@@ -14930,12 +15589,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
- if (NegN0 && NegN1 &&
- (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
- CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
+ }
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
@@ -14964,7 +15625,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
case ISD::SETLT:
case ISD::SETLE:
std::swap(TrueOpnd, FalseOpnd);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOGT:
case ISD::SETUGT:
case ISD::SETOGE:
@@ -15021,12 +15682,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
- if (NegN0 && NegN1 &&
- (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
- CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
+ }
// FIXME: use fast math flags instead of Options.UnsafeFPMath
if (Options.UnsafeFPMath) {
@@ -15324,12 +15987,14 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
- if (NegN0 && NegN1 &&
- (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
- CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
+ }
return SDValue();
}
@@ -15396,11 +16061,7 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
if (N1Op0VT == MVT::f128)
return false;
- // Avoid mismatched vector operand types, for better instruction selection.
- if (N1Op0VT.isVector())
- return false;
-
- return true;
+ return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound;
}
return false;
}
@@ -15722,12 +16383,12 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_round c1fp) -> c1fp
- if (N0CFP)
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
+ return C;
// fold (fp_round (fp_extend x)) -> x
if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
@@ -15755,8 +16416,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// Also, this is a value preserving truncation iff both fp_round's are.
if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
SDLoc DL(N);
- return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
- DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+ return DAG.getNode(
+ ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
}
}
@@ -15779,6 +16441,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
+ return FoldedVOp;
+
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() &&
N->use_begin()->getOpcode() == ISD::FP_ROUND)
@@ -15814,11 +16480,11 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
- N0.getValueType(), ExtLoad,
- DAG.getIntPtrConstant(1, SDLoc(N0))),
- ExtLoad.getValue(1));
+ CombineTo(
+ N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
+ DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
+ ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -16573,7 +17239,6 @@ static inline ElementCount numVectorEltsOrZero(EVT T) {
}
bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
- Val = ST->getValue();
EVT STType = Val.getValueType();
EVT STMemType = ST->getMemoryVT();
if (STType == STMemType)
@@ -16629,7 +17294,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
SDValue Chain = LD->getOperand(0);
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
// TODO: Relax this restriction for unordered atomics (see D66309)
- if (!ST || !ST->isSimple())
+ if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
return SDValue();
EVT LDType = LD->getValueType(0);
@@ -16665,9 +17330,10 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// significant bit in the loaded value maps to the least significant bit in
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
+ int64_t OrigOffset = Offset;
if (DAG.getDataLayout().isBigEndian())
- Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
- (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
+ Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
+ (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
8 -
Offset;
@@ -16679,8 +17345,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (LdStScalable)
STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
else
- STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
- StMemSize.getFixedSize());
+ STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
+ StMemSize.getFixedValue());
auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
if (LD->isIndexed()) {
@@ -16709,18 +17375,30 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// Mask to size of LDMemType
auto Mask =
DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
- StMemSize.getFixedSize()),
+ StMemSize.getFixedValue()),
SDLoc(ST), STType);
auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
return ReplaceLd(LD, Val, Chain);
}
}
+ // Handle some cases for big-endian that would be Offset 0 and handled for
+ // little-endian.
+ SDValue Val = ST->getValue();
+ if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
+ if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
+ !LDType.isVector() && isTypeLegal(STType) &&
+ TLI.isOperationLegal(ISD::SRL, STType)) {
+ Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
+ DAG.getConstant(Offset * 8, SDLoc(LD), STType));
+ Offset = 0;
+ }
+ }
+
// TODO: Deal with nonzero offset.
if (LD->getBasePtr().isUndef() || Offset != 0)
return SDValue();
// Model necessary truncations / extenstions.
- SDValue Val;
// Truncate Value To Stored Memory Size.
do {
if (!getTruncatedStoreValue(ST, Val))
@@ -17160,7 +17838,7 @@ struct LoadedSlice {
// Check if it will be merged with the load.
// 1. Check the alignment / fast memory access constraint.
- bool IsFast = false;
+ unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
Origin->getAddressSpace(), getAlign(),
Origin->getMemOperand()->getFlags(), &IsFast) ||
@@ -17663,7 +18341,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
- bool IsFast = false;
+ unsigned IsFast = 0;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
LD->getAddressSpace(), NewAlign,
@@ -17722,8 +18400,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
- bool FastLD = false, FastST = false;
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
+ unsigned FastLD = 0, FastST = 0;
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
@@ -17866,7 +18544,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
- Optional<MachineMemOperand::Flags> Flags;
+ std::optional<MachineMemOperand::Flags> Flags;
AAMDNodes AAInfo;
for (unsigned I = 0; I != NumStores; ++I) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
@@ -17941,6 +18619,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// We may need to add a bitcast here to get types to line up.
if (MemVTScalarTy != Val.getValueType().getScalarType()) {
Val = DAG.getBitcast(MemVT, Val);
+ } else if (MemVT.isVector() &&
+ Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
} else {
unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
: ISD::EXTRACT_VECTOR_ELT;
@@ -18331,7 +19012,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast = false;
+ unsigned IsFast = 0;
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
@@ -18441,7 +19122,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast = false;
+ unsigned IsFast = 0;
// Break early when size is too large to be legal.
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
@@ -18594,8 +19275,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
- bool IsFastSt = false;
- bool IsFastLd = false;
+ unsigned IsFastSt = 0;
+ unsigned IsFastLd = 0;
// Don't try vector types if we need a rotate. We may still fail the
// legality checks for the integer type, but we can't handle the rotate
// case with vectors.
@@ -19050,16 +19731,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits());
- // See if we can simplify the input to this truncstore with knowledge that
- // only the low bits are being used. For example:
- // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ // See if we can simplify the operation with SimplifyDemandedBits, which
+ // only works if the value has a single use.
AddToWorklist(Value.getNode());
- if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
- return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
- ST->getMemOperand());
-
- // Otherwise, see if we can simplify the operation with
- // SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
// Re-visit the store if anything changed and the store hasn't been merged
// with another node (N is deleted) SimplifyDemandedBits will add Value's
@@ -19069,6 +19743,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
+
+ // Otherwise, see if we can simplify the input to this truncstore with
+ // knowledge that only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ if (SDValue Shorter =
+ TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+ ST->getMemOperand());
+
+ // If we're storing a truncated constant, see if we can simplify it.
+ // TODO: Move this to targetShrinkDemandedConstant?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
+ if (!Cst->isOpaque()) {
+ const APInt &CValue = Cst->getAPIntValue();
+ APInt NewVal = CValue & TruncDemandedBits;
+ if (NewVal != CValue) {
+ SDValue Shorter =
+ DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+ }
+ }
}
// If this is a load followed by a store to the same location, then the store
@@ -19209,7 +19905,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// If we store purely within object bounds just before its lifetime ends,
// we can remove the store.
if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
- StoreSize.getFixedSize() * 8)) {
+ StoreSize.getFixedValue() * 8)) {
LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
dbgs() << "\nwithin LIFETIME_END of : ";
LifetimeEndBase.dump(); dbgs() << "\n");
@@ -19329,94 +20025,113 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
return St1;
}
-/// Convert a disguised subvector insertion into a shuffle:
-SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
- assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
- "Expected extract_vector_elt");
- SDValue InsertVal = N->getOperand(1);
- SDValue Vec = N->getOperand(0);
+// Merge an insertion into an existing shuffle:
+// (insert_vector_elt (vector_shuffle X, Y, Mask),
+// .(extract_vector_elt X, N), InsIndex)
+// --> (vector_shuffle X, Y, NewMask)
+// and variations where shuffle operands may be CONCAT_VECTORS.
+static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &NewMask, SDValue Elt,
+ unsigned InsIndex) {
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Elt.getOperand(1)))
+ return false;
- // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
- // InsIndex)
- // --> (vector_shuffle X, Y) and variations where shuffle operands may be
- // CONCAT_VECTORS.
- if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
- InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(InsertVal.getOperand(1))) {
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
- ArrayRef<int> Mask = SVN->getMask();
+ // Vec's operand 0 is using indices from 0 to N-1 and
+ // operand 1 from N to 2N - 1, where N is the number of
+ // elements in the vectors.
+ SDValue InsertVal0 = Elt.getOperand(0);
+ int ElementOffset = -1;
+
+ // We explore the inputs of the shuffle in order to see if we find the
+ // source of the extract_vector_elt. If so, we can use it to modify the
+ // shuffle rather than perform an insert_vector_elt.
+ SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+ ArgWorkList.emplace_back(Mask.size(), Y);
+ ArgWorkList.emplace_back(0, X);
+
+ while (!ArgWorkList.empty()) {
+ int ArgOffset;
+ SDValue ArgVal;
+ std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+ if (ArgVal == InsertVal0) {
+ ElementOffset = ArgOffset;
+ break;
+ }
- SDValue X = Vec.getOperand(0);
- SDValue Y = Vec.getOperand(1);
-
- // Vec's operand 0 is using indices from 0 to N-1 and
- // operand 1 from N to 2N - 1, where N is the number of
- // elements in the vectors.
- SDValue InsertVal0 = InsertVal.getOperand(0);
- int ElementOffset = -1;
-
- // We explore the inputs of the shuffle in order to see if we find the
- // source of the extract_vector_elt. If so, we can use it to modify the
- // shuffle rather than perform an insert_vector_elt.
- SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
- ArgWorkList.emplace_back(Mask.size(), Y);
- ArgWorkList.emplace_back(0, X);
-
- while (!ArgWorkList.empty()) {
- int ArgOffset;
- SDValue ArgVal;
- std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
-
- if (ArgVal == InsertVal0) {
- ElementOffset = ArgOffset;
- break;
+ // Peek through concat_vector.
+ if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+ int CurrentArgOffset =
+ ArgOffset + ArgVal.getValueType().getVectorNumElements();
+ int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+ for (SDValue Op : reverse(ArgVal->ops())) {
+ CurrentArgOffset -= Step;
+ ArgWorkList.emplace_back(CurrentArgOffset, Op);
}
- // Peek through concat_vector.
- if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
- int CurrentArgOffset =
- ArgOffset + ArgVal.getValueType().getVectorNumElements();
- int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
- for (SDValue Op : reverse(ArgVal->ops())) {
- CurrentArgOffset -= Step;
- ArgWorkList.emplace_back(CurrentArgOffset, Op);
- }
-
- // Make sure we went through all the elements and did not screw up index
- // computation.
- assert(CurrentArgOffset == ArgOffset);
- }
+ // Make sure we went through all the elements and did not screw up index
+ // computation.
+ assert(CurrentArgOffset == ArgOffset);
}
+ }
- // If we failed to find a match, see if we can replace an UNDEF shuffle
- // operand.
- if (ElementOffset == -1 && Y.isUndef() &&
- InsertVal0.getValueType() == Y.getValueType()) {
- ElementOffset = Mask.size();
- Y = InsertVal0;
- }
+ // If we failed to find a match, see if we can replace an UNDEF shuffle
+ // operand.
+ if (ElementOffset == -1) {
+ if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
+ return false;
+ ElementOffset = Mask.size();
+ Y = InsertVal0;
+ }
- if (ElementOffset != -1) {
- SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
+ NewMask.assign(Mask.begin(), Mask.end());
+ NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
+ assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
+ "NewMask[InsIndex] is out of bound");
+ return true;
+}
- auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
- NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
- assert(NewMask[InsIndex] <
- (int)(2 * Vec.getValueType().getVectorNumElements()) &&
- NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
+// Merge an insertion into an existing shuffle:
+// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+// InsIndex)
+// --> (vector_shuffle X, Y) and variations where shuffle operands may be
+// CONCAT_VECTORS.
+SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
+ SDValue InsertVal = N->getOperand(1);
+ SDValue Vec = N->getOperand(0);
- SDValue LegalShuffle =
- TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
- Y, NewMask, DAG);
- if (LegalShuffle)
- return LegalShuffle;
- }
+ auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
+ if (!SVN || !Vec.hasOneUse())
+ return SDValue();
+
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue X = Vec.getOperand(0);
+ SDValue Y = Vec.getOperand(1);
+
+ SmallVector<int, 16> NewMask(Mask);
+ if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
+ SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
+ Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
- // insert_vector_elt V, (bitcast X from vector type), IdxC -->
- // bitcast(shuffle (bitcast V), (extended X), Mask)
- // Note: We do not use an insert_subvector node because that requires a
- // legal subvector type.
+ return SDValue();
+}
+
+// Convert a disguised subvector insertion into a shuffle:
+// insert_vector_elt V, (bitcast X from vector type), IdxC -->
+// bitcast(shuffle (bitcast V), (extended X), Mask)
+// Note: We do not use an insert_subvector node because that requires a
+// legal subvector type.
+SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
+ SDValue InsertVal = N->getOperand(1);
+
if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
!InsertVal.getOperand(0).getValueType().isVector())
return SDValue();
@@ -19491,13 +20206,8 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (!IndexC) {
// If this is variable insert to undef vector, it might be better to splat:
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
- if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
- if (VT.isScalableVector())
- return DAG.getSplatVector(VT, DL, InVal);
-
- SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
+ return DAG.getSplat(VT, DL, InVal);
return SDValue();
}
@@ -19509,9 +20219,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
- if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
- return Shuf;
-
// Handle <1 x ???> vector insertion special cases.
if (NumElts == 1) {
// insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
@@ -19541,6 +20248,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
+ if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
+ return Shuf;
+
+ if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
+ return Shuf;
+
// Attempt to convert an insert_vector_elt chain into a legal build_vector.
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
// vXi1 vector - we don't need to recurse.
@@ -19610,9 +20323,52 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
continue;
}
+ // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
+ // update the shuffle mask (and second operand if we started with unary
+ // shuffle) and create a new legal shuffle.
+ if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
+ auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
+ SDValue LHS = SVN->getOperand(0);
+ SDValue RHS = SVN->getOperand(1);
+ SmallVector<int, 16> Mask(SVN->getMask());
+ bool Merged = true;
+ for (auto I : enumerate(Ops)) {
+ SDValue &Op = I.value();
+ if (Op) {
+ SmallVector<int, 16> NewMask;
+ if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
+ Merged = false;
+ break;
+ }
+ Mask = std::move(NewMask);
+ }
+ }
+ if (Merged)
+ if (SDValue NewShuffle =
+ TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
+ return NewShuffle;
+ }
+
// Failed to find a match in the chain - bail.
break;
}
+
+ // See if we can fill in the missing constant elements as zeros.
+ // TODO: Should we do this for any constant?
+ APInt DemandedZeroElts = APInt::getZero(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ DemandedZeroElts.setBit(I);
+
+ if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
+ SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
+ : DAG.getConstantFP(0, DL, MaxEltVT);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ Ops[I] = Zero;
+
+ return CanonicalizeBuildVector(Ops);
+ }
}
return SDValue();
@@ -19653,7 +20409,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
- bool IsFast = false;
+ unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
OriginalLoad->getAddressSpace(), Alignment,
OriginalLoad->getMemOperand()->getFlags(),
@@ -19731,6 +20487,168 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
return SDValue();
}
+// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
+// recursively analyse all of it's users. and try to model themselves as
+// bit sequence extractions. If all of them agree on the new, narrower element
+// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
+// new element type, do so now.
+// This is mainly useful to recover from legalization that scalarized
+// the vector as wide elements, but tries to rebuild it with narrower elements.
+//
+// Some more nodes could be modelled if that helps cover interesting patterns.
+bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
+ SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may cause legalizaton cycles.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return false;
+
+ // TODO: Add support for big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return false;
+
+ SDValue VecOp = N->getOperand(0);
+ EVT VecVT = VecOp.getValueType();
+ assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
+
+ // We must start with a constant extraction index.
+ auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!IndexC)
+ return false;
+
+ assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
+ "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
+
+ // TODO: deal with the case of implicit anyext of the extraction.
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ EVT ScalarVT = N->getValueType(0);
+ if (VecVT.getScalarType() != ScalarVT)
+ return false;
+
+ // TODO: deal with the cases other than everything being integer-typed.
+ if (!ScalarVT.isScalarInteger())
+ return false;
+
+ struct Entry {
+ SDNode *Producer;
+
+ // Which bits of VecOp does it contain?
+ unsigned BitPos;
+ int NumBits;
+ // NOTE: the actual width of \p Producer may be wider than NumBits!
+
+ Entry(Entry &&) = default;
+ Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
+ : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
+
+ Entry() = delete;
+ Entry(const Entry &) = delete;
+ Entry &operator=(const Entry &) = delete;
+ Entry &operator=(Entry &&) = delete;
+ };
+ SmallVector<Entry, 32> Worklist;
+ SmallVector<Entry, 32> Leafs;
+
+ // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
+ Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
+ /*NumBits=*/VecEltBitWidth);
+
+ while (!Worklist.empty()) {
+ Entry E = Worklist.pop_back_val();
+ // Does the node not even use any of the VecOp bits?
+ if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
+ E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
+ return false; // Let's allow the other combines clean this up first.
+ // Did we fail to model any of the users of the Producer?
+ bool ProducerIsLeaf = false;
+ // Look at each user of this Producer.
+ for (SDNode *User : E.Producer->uses()) {
+ switch (User->getOpcode()) {
+ // TODO: support ISD::BITCAST
+ // TODO: support ISD::ANY_EXTEND
+ // TODO: support ISD::ZERO_EXTEND
+ // TODO: support ISD::SIGN_EXTEND
+ case ISD::TRUNCATE:
+ // Truncation simply means we keep position, but extract less bits.
+ Worklist.emplace_back(User, E.BitPos,
+ /*NumBits=*/User->getValueSizeInBits(0));
+ break;
+ // TODO: support ISD::SRA
+ // TODO: support ISD::SHL
+ case ISD::SRL:
+ // We should be shifting the Producer by a constant amount.
+ if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
+ User->getOperand(0).getNode() == E.Producer && ShAmtC) {
+ // Logical right-shift means that we start extraction later,
+ // but stop it at the same position we did previously.
+ unsigned ShAmt = ShAmtC->getZExtValue();
+ Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
+ break;
+ }
+ [[fallthrough]];
+ default:
+ // We can not model this user of the Producer.
+ // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
+ ProducerIsLeaf = true;
+ // Profitability check: all users that we can not model
+ // must be ISD::BUILD_VECTOR's.
+ if (User->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ break;
+ }
+ }
+ if (ProducerIsLeaf)
+ Leafs.emplace_back(std::move(E));
+ }
+
+ unsigned NewVecEltBitWidth = Leafs.front().NumBits;
+
+ // If we are still at the same element granularity, give up,
+ if (NewVecEltBitWidth == VecEltBitWidth)
+ return false;
+
+ // The vector width must be a multiple of the new element width.
+ if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
+ return false;
+
+ // All leafs must agree on the new element width.
+ // All leafs must not expect any "padding" bits ontop of that width.
+ // All leafs must start extraction from multiple of that width.
+ if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
+ return (unsigned)E.NumBits == NewVecEltBitWidth &&
+ E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
+ E.BitPos % NewVecEltBitWidth == 0;
+ }))
+ return false;
+
+ EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
+ VecVT.getSizeInBits() / NewVecEltBitWidth);
+
+ if (LegalTypes &&
+ !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
+ return false;
+
+ if (LegalOperations &&
+ !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT)))
+ return false;
+
+ SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
+ for (const Entry &E : Leafs) {
+ SDLoc DL(E.Producer);
+ unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
+ assert(NewIndex < NewVecVT.getVectorNumElements() &&
+ "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
+ SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
+ DAG.getVectorIdxConstant(NewIndex, DL));
+ CombineTo(E.Producer, V);
+ }
+
+ return true;
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue VecOp = N->getOperand(0);
SDValue Index = N->getOperand(1);
@@ -19774,6 +20692,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
+ // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
+ if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
+ return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
+ VecOp.getOperand(0), Index));
+ }
+
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
@@ -19819,7 +20743,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
SDValue BCSrc = VecOp.getOperand(0);
if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
- return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
+ return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
if (LegalTypes && BCSrc.getValueType().isInteger() &&
BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
@@ -19919,6 +20843,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
+ if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
+ return SDValue(N, 0);
+
// Everything under here is trying to match an extract of a loaded value.
// If the result of load has to be truncated, then it's not necessarily
// profitable.
@@ -20160,7 +21087,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
// Simplify (build_vec (trunc $1)
// (trunc (srl $1 half-width))
-// (trunc (srl $1 (2 * half-width))) …)
+// (trunc (srl $1 (2 * half-width))))
// to (bitcast $1)
SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
@@ -20313,6 +21240,29 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
ConcatOps[0] = VecIn2;
VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
+ if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
+ !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
+ return SDValue();
+ // If dest vector has less than two elements, then use shuffle and extract
+ // from larger regs will cost even more.
+ if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
+ return SDValue();
+ assert(InVT2Size <= InVT1Size &&
+ "Second input is not going to be larger than the first one.");
+
+ // VecIn1 is wider than the output, and we have another, possibly
+ // smaller input. Pad the smaller input with undefs, shuffle at the
+ // input vector width, and extract the output.
+ // The shuffle type is different than VT, so check legality again.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+ return SDValue();
+
+ if (InVT1 != InVT2) {
+ VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+ DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
+ ShuffleNumElems = InVT1Size / VTSize * NumElems;
} else {
// TODO: Support cases where the length mismatch isn't exactly by a
// factor of 2.
@@ -20753,6 +21703,127 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
VT, In);
}
+// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
+// and all other elements being constant zero's, granularize the BUILD_VECTOR's
+// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
+// This patten can appear during legalization.
+//
+// NOTE: This can be generalized to allow more than a single
+// non-constant-zero op, UNDEF's, and to be KnownBits-based,
+SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
+ // Don't run this after legalization. Targets may have other preferences.
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // FIXME: support big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
+
+ EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+
+ if (!TLI.isTypeLegal(OpIntVT) ||
+ (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
+ return SDValue();
+
+ unsigned EltBitwidth = VT.getScalarSizeInBits();
+ // NOTE: the actual width of operands may be wider than that!
+
+ // Analyze all operands of this BUILD_VECTOR. What is the largest number of
+ // active bits they all have? We'll want to truncate them all to that width.
+ unsigned ActiveBits = 0;
+ APInt KnownZeroOps(VT.getVectorNumElements(), 0);
+ for (auto I : enumerate(N->ops())) {
+ SDValue Op = I.value();
+ // FIXME: support UNDEF elements?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ unsigned OpActiveBits =
+ Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
+ if (OpActiveBits == 0) {
+ KnownZeroOps.setBit(I.index());
+ continue;
+ }
+ // Profitability check: don't allow non-zero constant operands.
+ return SDValue();
+ }
+ // Profitability check: there must only be a single non-zero operand,
+ // and it must be the first operand of the BUILD_VECTOR.
+ if (I.index() != 0)
+ return SDValue();
+ // The operand must be a zero-extension itself.
+ // FIXME: this could be generalized to known leading zeros check.
+ if (Op.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ unsigned CurrActiveBits =
+ Op.getOperand(0).getValueSizeInBits().getFixedValue();
+ assert(!ActiveBits && "Already encountered non-constant-zero operand?");
+ ActiveBits = CurrActiveBits;
+ // We want to at least halve the element size.
+ if (2 * ActiveBits > EltBitwidth)
+ return SDValue();
+ }
+
+ // This BUILD_VECTOR must have at least one non-constant-zero operand.
+ if (ActiveBits == 0)
+ return SDValue();
+
+ // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
+ // into how many chunks can we split our element width?
+ EVT NewScalarIntVT, NewIntVT;
+ std::optional<unsigned> Factor;
+ // We can split the element into at least two chunks, but not into more
+ // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
+ // for which the element width is a multiple of it,
+ // and the resulting types/operations on that chunk width are legal.
+ assert(2 * ActiveBits <= EltBitwidth &&
+ "We know that half or less bits of the element are active.");
+ for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
+ if (EltBitwidth % Scale != 0)
+ continue;
+ unsigned ChunkBitwidth = EltBitwidth / Scale;
+ assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
+ NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
+ NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
+ Scale * N->getNumOperands());
+ if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
+ (LegalOperations &&
+ !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
+ TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT))))
+ continue;
+ Factor = Scale;
+ break;
+ }
+ if (!Factor)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
+
+ // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
+ SmallVector<SDValue, 16> NewOps;
+ NewOps.reserve(NewIntVT.getVectorNumElements());
+ for (auto I : enumerate(N->ops())) {
+ SDValue Op = I.value();
+ assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
+ unsigned SrcOpIdx = I.index();
+ if (KnownZeroOps[SrcOpIdx]) {
+ NewOps.append(*Factor, ZeroOp);
+ continue;
+ }
+ Op = DAG.getBitcast(OpIntVT, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
+ NewOps.emplace_back(Op);
+ NewOps.append(*Factor - 1, ZeroOp);
+ }
+ assert(NewOps.size() == NewIntVT.getVectorNumElements());
+ SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
+ NewBV = DAG.getBitcast(VT, NewBV);
+ return NewBV;
+}
+
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -20818,6 +21889,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = convertBuildVecZextToZext(N))
return V;
+ if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
+ return V;
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
@@ -21078,6 +22152,109 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(CastOpcode, DL, VT, NewConcat);
}
+// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
+// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
+// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
+static SDValue combineConcatVectorOfShuffleAndItsOperands(
+ SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
+ bool LegalOperations) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // For now, only allow simple 2-operand concatenations.
+ if (N->getNumOperands() != 2)
+ return SDValue();
+
+ // Don't create illegal types/shuffles when not allowed to.
+ if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
+ (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)))
+ return SDValue();
+
+ // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
+ // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
+ // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
+ // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
+ // (4) and for now, the SHUFFLE_VECTOR must be unary.
+ ShuffleVectorSDNode *SVN = nullptr;
+ for (SDValue Op : N->ops()) {
+ if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
+ CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
+ all_of(N->ops(), [CurSVN](SDValue Op) {
+ // FIXME: can we allow UNDEF operands?
+ return !Op.isUndef() &&
+ (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
+ })) {
+ SVN = CurSVN;
+ break;
+ }
+ }
+ if (!SVN)
+ return SDValue();
+
+ // We are going to pad the shuffle operands, so any indice, that was picking
+ // from the second operand, must be adjusted.
+ SmallVector<int, 16> AdjustedMask;
+ AdjustedMask.reserve(SVN->getMask().size());
+ assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
+ append_range(AdjustedMask, SVN->getMask());
+
+ // Identity masks for the operands of the (padded) shuffle.
+ SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
+ MutableArrayRef<int> FirstShufOpIdentityMask =
+ MutableArrayRef<int>(IdentityMask)
+ .take_front(OpVT.getVectorNumElements());
+ MutableArrayRef<int> SecondShufOpIdentityMask =
+ MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements());
+ std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
+ std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
+ VT.getVectorNumElements());
+
+ // New combined shuffle mask.
+ SmallVector<int, 32> Mask;
+ Mask.reserve(VT.getVectorNumElements());
+ for (SDValue Op : N->ops()) {
+ assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
+ if (Op.getNode() == SVN) {
+ append_range(Mask, AdjustedMask);
+ continue;
+ }
+ if (Op == SVN->getOperand(0)) {
+ append_range(Mask, FirstShufOpIdentityMask);
+ continue;
+ }
+ if (Op == SVN->getOperand(1)) {
+ append_range(Mask, SecondShufOpIdentityMask);
+ continue;
+ }
+ llvm_unreachable("Unexpected operand!");
+ }
+
+ // Don't create illegal shuffle masks.
+ if (!TLI.isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ // Pad the shuffle operands with UNDEF.
+ SDLoc dl(N);
+ std::array<SDValue, 2> ShufOps;
+ for (auto I : zip(SVN->ops(), ShufOps)) {
+ SDValue ShufOp = std::get<0>(I);
+ SDValue &NewShufOp = std::get<1>(I);
+ if (ShufOp.isUndef())
+ NewShufOp = DAG.getUNDEF(VT);
+ else {
+ SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
+ DAG.getUNDEF(OpVT));
+ ShufOpParts[0] = ShufOp;
+ NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
+ }
+ }
+ // Finally, create the new wide shuffle.
+ return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
@@ -21213,6 +22390,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
+ if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
+ N, DAG, TLI, LegalTypes, LegalOperations))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
// operands and look for a CONCAT operations that place the incoming vectors
@@ -21490,7 +22671,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
} else
- MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
StoreSize);
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
@@ -22050,14 +23231,53 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
+// Match shuffles that can be converted to *_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
+// and returns the EVT to which the extension should be performed.
+// NOTE: this assumes that the src is the first operand of the shuffle.
+static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
+ unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
+ SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
+ bool LegalOperations) {
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return std::nullopt;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ // FIXME: should try Scale == NumElts case too,
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ // The vector width must be a multiple of Scale.
+ if (NumElts % Scale != 0)
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+
+ if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
+ (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
+ continue;
+
+ if (Match(Scale))
+ return OutVT;
+ }
+
+ return std::nullopt;
+}
+
// Match shuffles that can be converted to any_vector_extend_in_reg.
// This is often generated during legalization.
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
-// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
-static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
- SelectionDAG &DAG,
- const TargetLowering &TLI,
- bool LegalOperations) {
+static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
@@ -22065,13 +23285,9 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
if (!VT.isInteger() || IsBigEndian)
return SDValue();
- unsigned NumElts = VT.getVectorNumElements();
- unsigned EltSizeInBits = VT.getScalarSizeInBits();
- ArrayRef<int> Mask = SVN->getMask();
- SDValue N0 = SVN->getOperand(0);
-
// shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
- auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+ auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
+ Mask = SVN->getMask()](unsigned Scale) {
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] < 0)
continue;
@@ -22082,27 +23298,138 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
return true;
};
- // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
- // power-of-2 extensions as they are the most likely.
- for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
- // Check for non power of 2 vector sizes
- if (NumElts % Scale != 0)
- continue;
- if (!isAnyExtend(Scale))
- continue;
+ unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
+ SDValue N0 = SVN->getOperand(0);
+ // Never create an illegal type. Only create unsupported operations if we
+ // are pre-legalization.
+ std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
+ Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
+ if (!OutVT)
+ return SDValue();
+ return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
+}
- EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
- EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
- // Never create an illegal type. Only create unsupported operations if we
- // are pre-legalization.
- if (TLI.isTypeLegal(OutVT))
- if (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
- return DAG.getBitcast(VT,
- DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
- SDLoc(SVN), OutVT, N0));
- }
+// Match shuffles that can be converted to zero_extend_vector_inreg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
+static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ bool LegalTypes = true;
+ EVT VT = SVN->getValueType(0);
+ assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ // TODO: add support for big-endian when we have a test case.
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+ SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
+ auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
+ for (int &Indice : Mask) {
+ if (Indice < 0)
+ continue;
+ int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
+ int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
+ Fn(Indice, OpIdx, OpEltIdx);
+ }
+ };
+
+ // Which elements of which operand does this shuffle demand?
+ std::array<APInt, 2> OpsDemandedElts;
+ for (APInt &OpDemandedElts : OpsDemandedElts)
+ OpDemandedElts = APInt::getZero(NumElts);
+ ForEachDecomposedIndice(
+ [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
+ OpsDemandedElts[OpIdx].setBit(OpEltIdx);
+ });
+
+ // Element-wise(!), which of these demanded elements are know to be zero?
+ std::array<APInt, 2> OpsKnownZeroElts;
+ for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
+ std::get<2>(I) =
+ DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
+
+ // Manifest zeroable element knowledge in the shuffle mask.
+ // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
+ // this is a local invention, but it won't leak into DAG.
+ // FIXME: should we not manifest them, but just check when matching?
+ bool HadZeroableElts = false;
+ ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
+ int &Indice, int OpIdx, int OpEltIdx) {
+ if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
+ Indice = -2; // Zeroable element.
+ HadZeroableElts = true;
+ }
+ });
+
+ // Don't proceed unless we've refined at least one zeroable mask indice.
+ // If we didn't, then we are still trying to match the same shuffle mask
+ // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
+ // and evidently failed. Proceeding will lead to endless combine loops.
+ if (!HadZeroableElts)
+ return SDValue();
+
+ // The shuffle may be more fine-grained than we want. Widen elements first.
+ // FIXME: should we do this before manifesting zeroable shuffle mask indices?
+ SmallVector<int, 16> ScaledMask;
+ getShuffleMaskWithWidestElts(Mask, ScaledMask);
+ assert(Mask.size() >= ScaledMask.size() &&
+ Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
+ int Prescale = Mask.size() / ScaledMask.size();
+
+ NumElts = ScaledMask.size();
+ EltSizeInBits *= Prescale;
+
+ EVT PrescaledVT = EVT::getVectorVT(
+ *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
+ NumElts);
+
+ if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // For example,
+ // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
+ // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
+ auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
+ assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
+ "Unexpected mask scaling factor.");
+ ArrayRef<int> Mask = ScaledMask;
+ for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
+ SrcElt != NumSrcElts; ++SrcElt) {
+ // Analyze the shuffle mask in Scale-sized chunks.
+ ArrayRef<int> MaskChunk = Mask.take_front(Scale);
+ assert(MaskChunk.size() == Scale && "Unexpected mask size.");
+ Mask = Mask.drop_front(MaskChunk.size());
+ // The first indice in this chunk must be SrcElt, but not zero!
+ // FIXME: undef should be fine, but that results in more-defined result.
+ if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
+ return false;
+ // The rest of the indices in this chunk must be zeros.
+ // FIXME: undef should be fine, but that results in more-defined result.
+ if (!all_of(MaskChunk.drop_front(1),
+ [](int Indice) { return Indice == -2; }))
+ return false;
+ }
+ assert(Mask.empty() && "Did not process the whole mask?");
+ return true;
+ };
+
+ unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
+ for (bool Commuted : {false, true}) {
+ SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
+ if (Commuted)
+ ShuffleVectorSDNode::commuteMask(ScaledMask);
+ std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
+ Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
+ LegalOperations);
+ if (OutVT)
+ return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
+ DAG.getBitcast(PrescaledVT, Op)));
+ }
return SDValue();
}
@@ -22174,9 +23501,52 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// the masks of the shuffles.
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
if (!Shuf->getOperand(1).isUndef())
return SDValue();
+ // See if this unary non-splat shuffle actually *is* a splat shuffle,
+ // in disguise, with all demanded elements being identical.
+ // FIXME: this can be done per-operand.
+ if (!Shuf->isSplat()) {
+ APInt DemandedElts(NumElts, 0);
+ for (int Idx : Shuf->getMask()) {
+ if (Idx < 0)
+ continue; // Ignore sentinel indices.
+ assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
+ DemandedElts.setBit(Idx);
+ }
+ assert(DemandedElts.countPopulation() > 1 && "Is a splat shuffle already?");
+ APInt UndefElts;
+ if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
+ // Even if all demanded elements are splat, some of them could be undef.
+ // Which lowest demanded element is *not* known-undef?
+ std::optional<unsigned> MinNonUndefIdx;
+ for (int Idx : Shuf->getMask()) {
+ if (Idx < 0 || UndefElts[Idx])
+ continue; // Ignore sentinel indices, and undef elements.
+ MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
+ }
+ if (!MinNonUndefIdx)
+ return DAG.getUNDEF(VT); // All undef - result is undef.
+ assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
+ SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
+ Shuf->getMask().end());
+ for (int &Idx : SplatMask) {
+ if (Idx < 0)
+ continue; // Passthrough sentinel indices.
+ // Otherwise, just pick the lowest demanded non-undef element.
+ // Or sentinel undef, if we know we'd pick a known-undef element.
+ Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
+ }
+ assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
+ return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
+ Shuf->getOperand(1), SplatMask);
+ }
+ }
+
// If the inner operand is a known splat with no undefs, just return that directly.
// TODO: Create DemandedElts mask from Shuf's mask.
// TODO: Allow undef elements and merge with the shuffle code below.
@@ -22360,7 +23730,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
// First, check if we are taking one element of a vector and shuffling that
// element into another vector.
ArrayRef<int> Mask = Shuf->getMask();
- SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
+ SmallVector<int, 16> CommutedMask(Mask);
SDValue Op0 = Shuf->getOperand(0);
SDValue Op1 = Shuf->getOperand(1);
int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
@@ -22514,6 +23884,23 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
if (Idx->getAPIntValue() == SplatIndex)
return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+
+ // Look through a bitcast if LE and splatting lane 0, through to a
+ // scalar_to_vector or a build_vector.
+ if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
+ SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
+ (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
+ VT.isInteger() && N00VT.isInteger()) {
+ EVT InVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
+ SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
+ SDLoc(N), InVT);
+ return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
+ }
+ }
}
// If this is a bit convert that changes the element type of the vector but
@@ -22574,7 +23961,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return ShufOp;
// Match shuffles that can be converted to any_vector_extend_in_reg.
- if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ if (SDValue V =
+ combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
return V;
// Combine "truncate_vector_in_reg" style shuffles.
@@ -22671,7 +24059,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
return InsertN1;
if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
- SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
+ SmallVector<int> CommuteMask(Mask);
ShuffleVectorSDNode::commuteMask(CommuteMask);
if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
return InsertN0;
@@ -22707,24 +24095,31 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDLoc DL(N);
EVT IntVT = VT.changeVectorElementTypeToInteger();
EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
- SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
- SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
- SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
- for (int I = 0; I != (int)NumElts; ++I)
- if (0 <= Mask[I])
- AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
-
- // See if a clear mask is legal instead of going via
- // XformToShuffleWithZero which loses UNDEF mask elements.
- if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
- return DAG.getBitcast(
- VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
- DAG.getConstant(0, DL, IntVT), ClearMask));
+ // Transform the type to a legal type so that the buildvector constant
+ // elements are not illegal. Make sure that the result is larger than the
+ // original type, incase the value is split into two (eg i64->i32).
+ if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
+ IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
+ if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
+ SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
+ SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
+ SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
+ for (int I = 0; I != (int)NumElts; ++I)
+ if (0 <= Mask[I])
+ AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
+
+ // See if a clear mask is legal instead of going via
+ // XformToShuffleWithZero which loses UNDEF mask elements.
+ if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
+ DAG.getConstant(0, DL, IntVT), ClearMask));
- if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
- return DAG.getBitcast(
- VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
- DAG.getBuildVector(IntVT, DL, AndMask)));
+ if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
+ DAG.getBuildVector(IntVT, DL, AndMask)));
+ }
}
}
@@ -23053,55 +24448,101 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
return V;
+ // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
+ // Perform this really late, because it could eliminate knowledge
+ // of undef elements created by this shuffle.
+ if (Level < AfterLegalizeTypes)
+ if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
+ LegalOperations))
+ return V;
+
return SDValue();
}
SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
- SDValue InVal = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (!VT.isFixedLengthVector())
+ return SDValue();
+
+ // Try to convert a scalar binop with an extracted vector element to a vector
+ // binop. This is intended to reduce potentially expensive register moves.
+ // TODO: Check if both operands are extracted.
+ // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
+ SDValue Scalar = N->getOperand(0);
+ unsigned Opcode = Scalar.getOpcode();
+ EVT VecEltVT = VT.getScalarType();
+ if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
+ TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
+ Scalar.getOperand(0).getValueType() == VecEltVT &&
+ Scalar.getOperand(1).getValueType() == VecEltVT &&
+ DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
+ // Match an extract element and get a shuffle mask equivalent.
+ SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
+
+ for (int i : {0, 1}) {
+ // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
+ // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
+ SDValue EE = Scalar.getOperand(i);
+ auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
+ if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ EE.getOperand(0).getValueType() == VT &&
+ isa<ConstantSDNode>(EE.getOperand(1))) {
+ // Mask = {ExtractIndex, undef, undef....}
+ ShufMask[0] = EE.getConstantOperandVal(1);
+ // Make sure the shuffle is legal if we are crossing lanes.
+ if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
+ SDLoc DL(N);
+ SDValue V[] = {EE.getOperand(0),
+ DAG.getConstant(C->getAPIntValue(), DL, VT)};
+ SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
+ return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
+ ShufMask);
+ }
+ }
+ }
+ }
// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
// with a VECTOR_SHUFFLE and possible truncate.
- if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- VT.isFixedLengthVector() &&
- InVal->getOperand(0).getValueType().isFixedLengthVector()) {
- SDValue InVec = InVal->getOperand(0);
- SDValue EltNo = InVal->getOperand(1);
- auto InVecT = InVec.getValueType();
- if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
- SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
- int Elt = C0->getZExtValue();
- NewMask[0] = Elt;
- // If we have an implict truncate do truncate here as long as it's legal.
- // if it's not legal, this should
- if (VT.getScalarType() != InVal.getValueType() &&
- InVal.getValueType().isScalarInteger() &&
- isTypeLegal(VT.getScalarType())) {
- SDValue Val =
- DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
- }
- if (VT.getScalarType() == InVecT.getScalarType() &&
- VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
- SDValue LegalShuffle =
- TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
- DAG.getUNDEF(InVecT), NewMask, DAG);
- if (LegalShuffle) {
- // If the initial vector is the correct size this shuffle is a
- // valid result.
- if (VT == InVecT)
- return LegalShuffle;
- // If not we must truncate the vector.
- if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
- SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
- EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
- InVecT.getVectorElementType(),
- VT.getVectorNumElements());
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
- LegalShuffle, ZeroIdx);
- }
- }
- }
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+ !Scalar.getOperand(0).getValueType().isFixedLengthVector())
+ return SDValue();
+
+ // If we have an implicit truncate, truncate here if it is legal.
+ if (VecEltVT != Scalar.getValueType() &&
+ Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
+ SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
+ }
+
+ auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
+ if (!ExtIndexC)
+ return SDValue();
+
+ SDValue SrcVec = Scalar.getOperand(0);
+ EVT SrcVT = SrcVec.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned VTNumElts = VT.getVectorNumElements();
+ if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
+ // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
+ SmallVector<int, 8> Mask(SrcNumElts, -1);
+ Mask[0] = ExtIndexC->getZExtValue();
+ SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
+ SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
+ if (!LegalShuffle)
+ return SDValue();
+
+ // If the initial vector is the same size, the shuffle is the result.
+ if (VT == SrcVT)
+ return LegalShuffle;
+
+ // If not, shorten the shuffled vector.
+ if (VTNumElts != SrcNumElts) {
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
+ EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getVectorElementType(), VTNumElts);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
+ ZeroIdx);
}
}
@@ -23331,6 +24772,15 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
}
SDValue DAGCombiner::visitVPOp(SDNode *N) {
+
+ if (N->getOpcode() == ISD::VP_GATHER)
+ if (SDValue SD = visitVPGATHER(N))
+ return SD;
+
+ if (N->getOpcode() == ISD::VP_SCATTER)
+ if (SDValue SD = visitVPSCATTER(N))
+ return SD;
+
// VP operations in which all vector elements are disabled - either by
// determining that the mask is all false or that the EVL is 0 - can be
// eliminated.
@@ -23499,10 +24949,40 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
}
// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
- if (VT.isScalableVector())
- return DAG.getSplatVector(VT, DL, ScalarBO);
- SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
- return DAG.getBuildVector(VT, DL, Ops);
+ return DAG.getSplat(VT, DL, ScalarBO);
+}
+
+/// Visit a vector cast operation, like FP_EXTEND.
+SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
+ EVT EltVT = VT.getVectorElementType();
+ unsigned Opcode = N->getOpcode();
+
+ SDValue N0 = N->getOperand(0);
+ EVT SrcVT = N0->getValueType(0);
+ EVT SrcEltVT = SrcVT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: promote operation might be also good here?
+ int Index0;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ if (Src0 &&
+ (N0.getOpcode() == ISD::SPLAT_VECTOR ||
+ TLI.isExtractVecEltCheap(VT, Index0)) &&
+ TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
+ TLI.preferScalarizeSplat(Opcode)) {
+ SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
+ SDValue Elt =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, ScalarBO);
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ return SDValue();
}
/// Visit a binary vector operation, like ADD.
@@ -23522,9 +25002,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
// same types of operations that are in the original sequence. We do have to
// restrict ops like integer div that have immediate UB (eg, div-by-zero)
// though. This code is adapted from the identical transform in instcombine.
- if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
- Opcode != ISD::UREM && Opcode != ISD::SREM &&
- Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
+ if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
@@ -23542,7 +25020,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
// demanded elements analysis. It is further limited to not change a splat
// of an inserted scalar because that may be optimized better by
// load-folding or other target-specific behaviors.
- if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
+ if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
@@ -23551,7 +25029,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
Shuf0->getMask());
}
- if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
+ if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
@@ -23624,7 +25102,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2) {
- assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+ assert(N0.getOpcode() == ISD::SETCC &&
+ "First argument must be a SetCC node!");
SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -24099,7 +25578,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
return V;
- // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
// where y is has a single bit set.
// A plaintext description would be, we can turn the SELECT_CC into an AND
// when the condition can be materialized as an all-ones register. Any
@@ -24550,7 +26029,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
- Optional<int64_t> NumBytes;
+ std::optional<int64_t> NumBytes;
MachineMemOperand *MMO;
};
@@ -24565,21 +26044,26 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
: 0;
uint64_t Size =
MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
- return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
+ return {LSN->isVolatile(),
+ LSN->isAtomic(),
+ LSN->getBasePtr(),
Offset /*base offset*/,
- Optional<int64_t>(Size),
+ std::optional<int64_t>(Size),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
- return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
+ return {false /*isVolatile*/,
+ /*isAtomic*/ false,
+ LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
- (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
- : Optional<int64_t>(),
+ (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
+ : std::optional<int64_t>(),
(MachineMemOperand *)nullptr};
// Default.
- return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
- (int64_t)0 /*offset*/,
- Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
+ return {false /*isvolatile*/,
+ /*isAtomic*/ false, SDValue(),
+ (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/,
+ (MachineMemOperand *)nullptr};
};
MemUseCharacteristics MUC0 = getCharacteristics(Op0),
@@ -24806,13 +26290,6 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getTokenFactor(SDLoc(N), Aliases);
}
-namespace {
-// TODO: Replace with with std::monostate when we move to C++17.
-struct UnitT { } Unit;
-bool operator==(const UnitT &, const UnitT &) { return true; }
-bool operator!=(const UnitT &, const UnitT &) { return false; }
-} // namespace
-
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
@@ -24833,8 +26310,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// the common case, every store writes to the immediately previous address
// space and thus merged with the previous interval at insertion time.
- using IMap =
- llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+ using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
+ IntervalMapHalfOpenInfo<int64_t>>;
IMap::Allocator A;
IMap Intervals(A);
@@ -24861,7 +26338,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
return false;
// Add ST's interval.
- Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
+ std::monostate{});
while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
if (Chain->getMemoryVT().isScalableVector())
@@ -24890,7 +26368,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// If there's a previous interval, we should start after it.
if (I != Intervals.begin() && (--I).stop() <= Offset)
break;
- Intervals.insert(Offset, Offset + Length, Unit);
+ Intervals.insert(Offset, Offset + Length, std::monostate{});
ChainedStores.push_back(Chain);
STChain = Chain;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ff5779967e22..2f2ae6e29855 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -42,7 +42,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -105,6 +104,7 @@
#include <cassert>
#include <cstdint>
#include <iterator>
+#include <optional>
#include <utility>
using namespace llvm;
@@ -319,7 +319,7 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) {
Reg = lookUpRegForValue(Op);
} else if (isa<UndefValue>(V)) {
Reg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
return Reg;
@@ -405,11 +405,6 @@ void FastISel::recomputeInsertPt() {
++FuncInfo.InsertPt;
} else
FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
-
- // Now skip past any EH_LABELs, which must remain at the beginning.
- while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
- FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
- ++FuncInfo.InsertPt;
}
void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
@@ -696,20 +691,20 @@ bool FastISel::selectStackmap(const CallInst *I) {
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
auto Builder =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown));
const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
Builder.addImm(0);
// Issue STACKMAP.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::STACKMAP));
for (auto const &MO : Ops)
MIB.add(MO);
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
.addImm(0)
.addImm(0);
@@ -878,7 +873,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
/*isImp=*/true));
// Insert the patchpoint instruction before the call generated by the target.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, MIMD,
TII.get(TargetOpcode::PATCHPOINT));
for (auto &MO : Ops)
@@ -907,7 +902,7 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
/*isDef=*/false));
MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
@@ -928,7 +923,7 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) {
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
/*isDef=*/false));
MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
@@ -1139,9 +1134,8 @@ bool FastISel::lowerCall(const CallInst *CI) {
bool IsTailCall = CI->isTailCall();
if (IsTailCall && !isInTailCallPosition(*CI, TM))
IsTailCall = false;
- if (IsTailCall && MF->getFunction()
- .getFnAttribute("disable-tail-calls")
- .getValueAsBool())
+ if (IsTailCall && !CI->isMustTailCall() &&
+ MF->getFunction().getFnAttribute("disable-tail-calls").getValueAsBool())
IsTailCall = false;
CallLoweringInfo CLI;
@@ -1171,7 +1165,7 @@ bool FastISel::selectCall(const User *I) {
ExtraInfo |= InlineAsm::Extra_IsConvergent;
ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::INLINEASM));
MIB.addExternalSymbol(IA->getAsmString().c_str());
MIB.addImm(ExtraInfo);
@@ -1229,7 +1223,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
return true;
- Optional<MachineOperand> Op;
+ std::optional<MachineOperand> Op;
if (Register Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
@@ -1251,24 +1245,24 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
false);
if (Op) {
- assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
"Expected inlined-at fields to agree");
- // A dbg.declare describes the address of a source variable, so lower it
- // into an indirect DBG_VALUE.
- auto Builder =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op,
- DI->getVariable(), DI->getExpression());
-
- // If using instruction referencing, mutate this into a DBG_INSTR_REF,
- // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
- // the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
- if (UseInstrRefDebugInfo && Op->isReg()) {
- Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
- Builder->getOperand(1).ChangeToImmediate(0);
- auto *NewExpr =
- DIExpression::prepend(DI->getExpression(), DIExpression::DerefBefore);
- Builder->getOperand(3).setMetadata(NewExpr);
+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
+ // the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
+ SmallVector<uint64_t, 3> Ops(
+ {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_deref});
+ auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, *Op,
+ DI->getVariable(), NewExpr);
+ } else {
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op,
+ DI->getVariable(), DI->getExpression());
}
} else {
// We can't yet handle anything else here because it would require
@@ -1283,12 +1277,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgValueInst *DI = cast<DbgValueInst>(II);
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
- assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
"Expected inlined-at fields to agree");
if (!V || isa<UndefValue>(V) || DI->hasArgList()) {
// DI is either undef or cannot produce a valid DBG_VALUE, so produce an
// undef DBG_VALUE to terminate any prior location.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
// See if there's an expression to constant-fold.
@@ -1296,35 +1290,42 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (Expr)
std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addCImm(CI)
.addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(Expr);
else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addImm(CI->getZExtValue())
.addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(Expr);
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addFPImm(CF)
.addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
- bool IsIndirect = false;
- auto Builder =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
- DI->getVariable(), DI->getExpression());
-
- // If using instruction referencing, mutate this into a DBG_INSTR_REF,
- // to be later patched up by finalizeDebugInstrRefs.
- if (UseInstrRefDebugInfo) {
- Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
- Builder->getOperand(1).ChangeToImmediate(0);
+ if (!FuncInfo.MF->useDebugInstrRef()) {
+ bool IsIndirect = false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect,
+ Reg, DI->getVariable(), DI->getExpression());
+ } else {
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs.
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs,
+ DI->getVariable(), NewExpr);
}
} else {
// We don't know how to handle other cases, so we drop.
@@ -1340,7 +1341,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
return true;
}
@@ -1448,7 +1449,7 @@ bool FastISel::selectFreeze(const User *I) {
MVT Ty = ETy.getSimpleVT();
const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty);
Register ResultReg = createResultReg(TyRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
updateValueMap(I, ResultReg);
@@ -1500,7 +1501,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
return false;
- DbgLoc = I->getDebugLoc();
+ MIMD = MIMetadata(*I);
SavedInsertPt = FuncInfo.InsertPt;
@@ -1525,7 +1526,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (!SkipTargetIndependentISel) {
if (selectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
- DbgLoc = DebugLoc();
+ MIMD = {};
return true;
}
// Remove dead code.
@@ -1537,7 +1538,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
// Next, try calling the target to attempt to handle the instruction.
if (fastSelectInstruction(I)) {
++NumFastIselSuccessTarget;
- DbgLoc = DebugLoc();
+ MIMD = {};
return true;
}
// Remove dead code.
@@ -1545,7 +1546,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (SavedInsertPt != FuncInfo.InsertPt)
removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
- DbgLoc = DebugLoc();
+ MIMD = {};
// Undo phi node updates, because they will be added again by SelectionDAG.
if (I->isTerminator()) {
// PHI node handling may have generated local value instructions.
@@ -1593,7 +1594,7 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
}
- fastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, MIMD.getDL());
}
/// Emit an FNeg operation.
@@ -1906,7 +1907,7 @@ Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op,
// If it's not legal to COPY between the register classes, something
// has gone very wrong before we got here.
Register NewOp = createResultReg(RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
return NewOp;
}
@@ -1919,7 +1920,7 @@ Register FastISel::fastEmitInst_(unsigned MachineInstOpcode,
Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg);
return ResultReg;
}
@@ -1931,13 +1932,14 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
@@ -1953,15 +1955,16 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addReg(Op1);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addReg(Op1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -1977,17 +1980,18 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addReg(Op1)
.addReg(Op2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addReg(Op1)
.addReg(Op2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2001,15 +2005,16 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2023,17 +2028,18 @@ Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addImm(Imm1)
.addImm(Imm2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addImm(Imm1)
.addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2046,13 +2052,14 @@ Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
Register ResultReg = createResultReg(RC);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addFPImm(FPImm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addFPImm(FPImm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2067,17 +2074,18 @@ Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addReg(Op1)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addReg(Op1)
.addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2088,12 +2096,13 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2105,7 +2114,7 @@ Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
ResultReg).addReg(Op0, 0, Idx);
return ResultReg;
}
@@ -2170,9 +2179,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Set the DebugLoc for the copy. Use the location of the operand if
// there is one; otherwise no location, flushLocalValueMap will fix it.
- DbgLoc = DebugLoc();
+ MIMD = {};
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
- DbgLoc = Inst->getDebugLoc();
+ MIMD = MIMetadata(*Inst);
Register Reg = getRegForValue(PHIOp);
if (!Reg) {
@@ -2180,7 +2189,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
return false;
}
FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg));
- DbgLoc = DebugLoc();
+ MIMD = {};
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index aa9c77f9cabf..c18cd39ed296 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -119,10 +119,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
}
- if (Personality == EHPersonality::Wasm_CXX) {
- WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
- calculateWasmEHInfo(&fn, EHInfo);
- }
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
@@ -154,7 +150,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
(TFI->isStackRealignable() || (Alignment <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
uint64_t TySize =
- MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
+ MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinValue();
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
@@ -270,7 +266,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
// the first one should be marked.
if (BB.hasAddressTaken())
- MBB->setHasAddressTaken();
+ MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB));
// Mark landing pad blocks.
if (BB.isEHPad())
@@ -323,10 +319,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
const auto *BB = CME.Handler.get<const BasicBlock *>();
CME.Handler = MBBMap[BB];
}
- }
-
- else if (Personality == EHPersonality::Wasm_CXX) {
+ } else if (Personality == EHPersonality::Wasm_CXX) {
WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ calculateWasmEHInfo(&fn, EHInfo);
+
// Map all BB references in the Wasm EH data to MBBs.
DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest;
for (auto &KV : EHInfo.SrcToUnwindDest) {
@@ -369,8 +365,7 @@ void FunctionLoweringInfo::clear() {
/// CreateReg - Allocate a single virtual register for the given type.
Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
- return RegInfo->createVirtualRegister(
- MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
+ return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT, isDivergent));
}
/// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -381,8 +376,6 @@ Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
/// will assign registers for each member or element.
///
Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
- const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
-
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
@@ -451,8 +444,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
Register DestReg = It->second;
if (DestReg == 0)
- return
- assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg");
+ return;
+ assert(DestReg.isVirtual() && "Expected a virtual reg");
LiveOutRegInfo.grow(DestReg);
LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
@@ -475,7 +468,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
Register SrcReg = ValueMap[V];
- if (!Register::isVirtualRegister(SrcReg)) {
+ if (!SrcReg.isVirtual()) {
DestLOI.IsValid = false;
return;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 3d3b504c6abd..338172e4e10a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -70,7 +70,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
continue;
if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
- if (Register::isPhysicalRegister(RN->getReg()))
+ if (RN->getReg().isPhysical())
continue;
NumImpUses = N - I;
break;
@@ -81,9 +81,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
-void InstrEmitter::
-EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
- Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) {
+void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ Register SrcReg,
+ DenseMap<SDValue, Register> &VRBaseMap) {
Register VRBase;
if (SrcReg.isVirtual()) {
// Just use the input register directly!
@@ -106,51 +106,50 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (TLI->isTypeLegal(VT))
UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
- if (!IsClone && !IsCloned)
- for (SDNode *User : Node->uses()) {
- bool Match = true;
- if (User->getOpcode() == ISD::CopyToReg &&
- User->getOperand(2).getNode() == Node &&
- User->getOperand(2).getResNo() == ResNo) {
- Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (DestReg.isVirtual()) {
- VRBase = DestReg;
- Match = false;
- } else if (DestReg != SrcReg)
- Match = false;
- } else {
- for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
- SDValue Op = User->getOperand(i);
- if (Op.getNode() != Node || Op.getResNo() != ResNo)
- continue;
- MVT VT = Node->getSimpleValueType(Op.getResNo());
- if (VT == MVT::Other || VT == MVT::Glue)
- continue;
- Match = false;
- if (User->isMachineOpcode()) {
- const MCInstrDesc &II = TII->get(User->getMachineOpcode());
- const TargetRegisterClass *RC = nullptr;
- if (i+II.getNumDefs() < II.getNumOperands()) {
- RC = TRI->getAllocatableClass(
- TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
- }
- if (!UseRC)
- UseRC = RC;
- else if (RC) {
- const TargetRegisterClass *ComRC =
+ for (SDNode *User : Node->uses()) {
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = nullptr;
+ if (i + II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF));
+ }
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC =
TRI->getCommonSubClass(UseRC, RC);
- // If multiple uses expect disjoint register classes, we emit
- // copies in AddRegisterOperand.
- if (ComRC)
- UseRC = ComRC;
- }
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
}
}
}
- MatchReg &= Match;
- if (VRBase)
- break;
}
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr;
SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
@@ -219,7 +218,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
RC = VTRC;
}
- if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) {
+ if (!II.operands().empty() && II.operands()[i].isOptionalDef()) {
// Optional def must be a physical register.
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
assert(VRBase.isPhysical());
@@ -231,8 +230,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
- unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
if (RegRC == RC) {
VRBase = Reg;
@@ -305,7 +304,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
const MCInstrDesc &MCID = MIB->getDesc();
bool isOptDef = IIOpNum < MCID.getNumOperands() &&
- MCID.OpInfo[IIOpNum].isOptionalDef();
+ MCID.operands()[IIOpNum].isOptionalDef();
// If the instruction requires a register in a different class, create
// a new virtual register and copy the value into it, but first attempt to
@@ -395,7 +394,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
(IIRC && TRI->isDivergentRegClass(IIRC)))
: nullptr;
- if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) {
+ if (OpRC && IIRC && OpRC != IIRC && VReg.isVirtual()) {
Register NewVReg = MRI->createVirtualRegister(IIRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
@@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
Register Reg;
MachineInstr *DefMI;
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
- if (R && Register::isPhysicalRegister(R->getReg())) {
+ if (R && R->getReg().isPhysical()) {
Reg = R->getReg();
DefMI = nullptr;
} else {
@@ -650,7 +649,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
// Skip physical registers as they don't have a vreg to get and we'll
// insert copies for them in TwoAddressInstructionPass anyway.
- if (!R || !Register::isPhysicalRegister(R->getReg())) {
+ if (!R || !R->getReg().isPhysical()) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
@@ -678,43 +677,54 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
- MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
- assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ assert(cast<DILocalVariable>(SD->getVariable())
+ ->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
SD->setIsEmitted();
- ArrayRef<SDDbgOperand> LocationOps = SD->getLocationOps();
- assert(!LocationOps.empty() && "dbg_value with no location operands?");
+ assert(!SD->getLocationOps().empty() &&
+ "dbg_value with no location operands?");
if (SD->isInvalidated())
return EmitDbgNoLocation(SD);
- // Emit variadic dbg_value nodes as DBG_VALUE_LIST.
- if (SD->isVariadic()) {
- // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)*
- const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST);
- // Build the DBG_VALUE_LIST instruction base.
- auto MIB = BuildMI(*MF, DL, DbgValDesc);
- MIB.addMetadata(Var);
- MIB.addMetadata(Expr);
- AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap);
- return &*MIB;
- }
-
// Attempt to produce a DBG_INSTR_REF if we've been asked to.
- // We currently exclude the possibility of instruction references for
- // variadic nodes; if at some point we enable them, this should be moved
- // above the variadic block.
if (EmitDebugInstrRefs)
if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap))
return InstrRef;
+ // Emit variadic dbg_value nodes as DBG_VALUE_LIST if they have not been
+ // emitted as instruction references.
+ if (SD->isVariadic())
+ return EmitDbgValueList(SD, VRBaseMap);
+
+ // Emit single-location dbg_value nodes as DBG_VALUE if they have not been
+ // emitted as instruction references.
return EmitDbgValueFromSingleOp(SD, VRBaseMap);
}
+MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) {
+ const Value *V = Op.getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ return MachineOperand::CreateCImm(CI);
+ return MachineOperand::CreateImm(CI->getSExtValue());
+ }
+ if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
+ return MachineOperand::CreateFPImm(CF);
+ // Note: This assumes that all nullptr constants are zero-valued.
+ if (isa<ConstantPointerNull>(V))
+ return MachineOperand::CreateImm(0);
+ // Undef or unhandled value type, so return an undef operand.
+ return MachineOperand::CreateReg(
+ /* Reg */ 0U, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true);
+}
+
void InstrEmitter::AddDbgValueLocationOps(
MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc,
ArrayRef<SDDbgOperand> LocationOps,
@@ -740,24 +750,9 @@ void InstrEmitter::AddDbgValueLocationOps(
AddOperand(MIB, V, (*MIB).getNumOperands(), &DbgValDesc, VRBaseMap,
/*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
} break;
- case SDDbgOperand::CONST: {
- const Value *V = Op.getConst();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->getBitWidth() > 64)
- MIB.addCImm(CI);
- else
- MIB.addImm(CI->getSExtValue());
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- MIB.addFPImm(CF);
- } else if (isa<ConstantPointerNull>(V)) {
- // Note: This assumes that all nullptr constants are zero-valued.
- MIB.addImm(0);
- } else {
- // Could be an Undef. In any case insert an Undef so we can see what we
- // dropped.
- MIB.addReg(0U);
- }
- } break;
+ case SDDbgOperand::CONST:
+ MIB.add(GetMOForConstDbgOp(Op));
+ break;
}
}
}
@@ -765,116 +760,158 @@ void InstrEmitter::AddDbgValueLocationOps(
MachineInstr *
InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
- assert(!SD->isVariadic());
- SDDbgOperand DbgOperand = SD->getLocationOps()[0];
MDNode *Var = SD->getVariable();
- DIExpression *Expr = (DIExpression*)SD->getExpression();
+ const DIExpression *Expr = (DIExpression *)SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
- // Handle variable locations that don't actually depend on the instructions
- // in the program: constants and stack locations.
- if (DbgOperand.getKind() == SDDbgOperand::FRAMEIX ||
- DbgOperand.getKind() == SDDbgOperand::CONST)
+ // Returns true if the given operand is not a legal debug operand for a
+ // DBG_INSTR_REF.
+ auto IsInvalidOp = [](SDDbgOperand DbgOp) {
+ return DbgOp.getKind() == SDDbgOperand::FRAMEIX;
+ };
+ // Returns true if the given operand is not itself an instruction reference
+ // but is a legal debug operand for a DBG_INSTR_REF.
+ auto IsNonInstrRefOp = [](SDDbgOperand DbgOp) {
+ return DbgOp.getKind() == SDDbgOperand::CONST;
+ };
+
+ // If this variable location does not depend on any instructions or contains
+ // any stack locations, produce it as a standard debug value instead.
+ if (any_of(SD->getLocationOps(), IsInvalidOp) ||
+ all_of(SD->getLocationOps(), IsNonInstrRefOp)) {
+ if (SD->isVariadic())
+ return EmitDbgValueList(SD, VRBaseMap);
return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+ }
// Immediately fold any indirectness from the LLVM-IR intrinsic into the
// expression:
- if (SD->isIndirect()) {
- std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
- Expr = DIExpression::append(Expr, Elts);
- }
+ if (SD->isIndirect())
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
+ // If this is not already a variadic expression, it must be modified to become
+ // one.
+ if (!SD->isVariadic())
+ Expr = DIExpression::convertToVariadicExpression(Expr);
+
+ SmallVector<MachineOperand> MOs;
// It may not be immediately possible to identify the MachineInstr that
// defines a VReg, it can depend for example on the order blocks are
// emitted in. When this happens, or when further analysis is needed later,
// produce an instruction like this:
//
- // DBG_INSTR_REF %0:gr64, 0, !123, !456
+ // DBG_INSTR_REF !123, !456, %0:gr64
//
// i.e., point the instruction at the vreg, and patch it up later in
// MachineFunction::finalizeDebugInstrRefs.
- auto EmitHalfDoneInstrRef = [&](unsigned VReg) -> MachineInstr * {
- auto MIB = BuildMI(*MF, DL, RefII);
- MIB.addReg(VReg);
- MIB.addImm(0);
- MIB.addMetadata(Var);
- MIB.addMetadata(Expr);
- return MIB;
+ auto AddVRegOp = [&](unsigned VReg) {
+ MOs.push_back(MachineOperand::CreateReg(
+ /* Reg */ VReg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true));
};
+ unsigned OpCount = SD->getLocationOps().size();
+ for (unsigned OpIdx = 0; OpIdx < OpCount; ++OpIdx) {
+ SDDbgOperand DbgOperand = SD->getLocationOps()[OpIdx];
+
+ // Try to find both the defined register and the instruction defining it.
+ MachineInstr *DefMI = nullptr;
+ unsigned VReg;
- // Try to find both the defined register and the instruction defining it.
- MachineInstr *DefMI = nullptr;
- unsigned VReg;
+ if (DbgOperand.getKind() == SDDbgOperand::VREG) {
+ VReg = DbgOperand.getVReg();
- if (DbgOperand.getKind() == SDDbgOperand::VREG) {
- VReg = DbgOperand.getVReg();
+ // No definition means that block hasn't been emitted yet. Leave a vreg
+ // reference to be fixed later.
+ if (!MRI->hasOneDef(VReg)) {
+ AddVRegOp(VReg);
+ continue;
+ }
- // No definition means that block hasn't been emitted yet. Leave a vreg
- // reference to be fixed later.
- if (!MRI->hasOneDef(VReg))
- return EmitHalfDoneInstrRef(VReg);
+ DefMI = &*MRI->def_instr_begin(VReg);
+ } else if (DbgOperand.getKind() == SDDbgOperand::SDNODE) {
+ // Look up the corresponding VReg for the given SDNode, if any.
+ SDNode *Node = DbgOperand.getSDNode();
+ SDValue Op = SDValue(Node, DbgOperand.getResNo());
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
+ // No VReg -> produce a DBG_VALUE $noreg instead.
+ if (I == VRBaseMap.end())
+ break;
- DefMI = &*MRI->def_instr_begin(VReg);
- } else {
- assert(DbgOperand.getKind() == SDDbgOperand::SDNODE);
- // Look up the corresponding VReg for the given SDNode, if any.
- SDNode *Node = DbgOperand.getSDNode();
- SDValue Op = SDValue(Node, DbgOperand.getResNo());
- DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
- // No VReg -> produce a DBG_VALUE $noreg instead.
- if (I==VRBaseMap.end())
- return EmitDbgNoLocation(SD);
-
- // Try to pick out a defining instruction at this point.
- VReg = getVR(Op, VRBaseMap);
-
- // Again, if there's no instruction defining the VReg right now, fix it up
- // later.
- if (!MRI->hasOneDef(VReg))
- return EmitHalfDoneInstrRef(VReg);
-
- DefMI = &*MRI->def_instr_begin(VReg);
- }
+ // Try to pick out a defining instruction at this point.
+ VReg = getVR(Op, VRBaseMap);
- // Avoid copy like instructions: they don't define values, only move them.
- // Leave a virtual-register reference until it can be fixed up later, to find
- // the underlying value definition.
- if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI))
- return EmitHalfDoneInstrRef(VReg);
+ // Again, if there's no instruction defining the VReg right now, fix it up
+ // later.
+ if (!MRI->hasOneDef(VReg)) {
+ AddVRegOp(VReg);
+ continue;
+ }
- auto MIB = BuildMI(*MF, DL, RefII);
+ DefMI = &*MRI->def_instr_begin(VReg);
+ } else {
+ assert(DbgOperand.getKind() == SDDbgOperand::CONST);
+ MOs.push_back(GetMOForConstDbgOp(DbgOperand));
+ continue;
+ }
- // Find the operand number which defines the specified VReg.
- unsigned OperandIdx = 0;
- for (const auto &MO : DefMI->operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
- break;
- ++OperandIdx;
+ // Avoid copy like instructions: they don't define values, only move them.
+ // Leave a virtual-register reference until it can be fixed up later, to
+ // find the underlying value definition.
+ if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) {
+ AddVRegOp(VReg);
+ continue;
+ }
+
+ // Find the operand number which defines the specified VReg.
+ unsigned OperandIdx = 0;
+ for (const auto &MO : DefMI->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI->getNumOperands());
+
+ // Make the DBG_INSTR_REF refer to that instruction, and that operand.
+ unsigned InstrNum = DefMI->getDebugInstrNum();
+ MOs.push_back(MachineOperand::CreateDbgInstrRef(InstrNum, OperandIdx));
}
- assert(OperandIdx < DefMI->getNumOperands());
- // Make the DBG_INSTR_REF refer to that instruction, and that operand.
- unsigned InstrNum = DefMI->getDebugInstrNum();
- MIB.addImm(InstrNum);
- MIB.addImm(OperandIdx);
- MIB.addMetadata(Var);
- MIB.addMetadata(Expr);
- return &*MIB;
+ // If we haven't created a valid MachineOperand for every DbgOp, abort and
+ // produce an undef DBG_VALUE.
+ if (MOs.size() != OpCount)
+ return EmitDbgNoLocation(SD);
+
+ return BuildMI(*MF, DL, RefII, false, MOs, Var, Expr);
}
MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
// An invalidated SDNode must generate an undef DBG_VALUE: although the
// original value is no longer computed, earlier DBG_VALUEs live ranges
// must not leak into later code.
+ DIVariable *Var = SD->getVariable();
+ const DIExpression *Expr =
+ DIExpression::convertToUndefExpression(SD->getExpression());
+ DebugLoc DL = SD->getDebugLoc();
+ const MCInstrDesc &Desc = TII->get(TargetOpcode::DBG_VALUE);
+ return BuildMI(*MF, DL, Desc, false, 0U, Var, Expr);
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgValueList(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
- auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
- MIB.addReg(0U);
- MIB.addReg(0U);
+ // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)*
+ const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST);
+ // Build the DBG_VALUE_LIST instruction base.
+ auto MIB = BuildMI(*MF, DL, DbgValDesc);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
+ AddDbgValueLocationOps(MIB, DbgValDesc, SD->getLocationOps(), VRBaseMap);
return &*MIB;
}
@@ -984,8 +1021,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
II.isVariadic() && II.variadicOpsAreDefs();
- bool HasPhysRegOuts = NumResults > NumDefs &&
- II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs;
+ bool HasPhysRegOuts = NumResults > NumDefs && !II.implicit_defs().empty() &&
+ !HasVRegVariadicDefs;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
if (II.isVariadic())
@@ -993,8 +1030,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
"Too few operands for a variadic node!");
else
assert(NumMIOperands >= II.getNumOperands() &&
- NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
- NumImpUses &&
+ NumMIOperands <=
+ II.getNumOperands() + II.implicit_defs().size() + NumImpUses &&
"#operands for dag node doesn't match .td file!");
#endif
@@ -1063,6 +1100,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// part of the function.
MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands());
+ // Set the CFI type.
+ MIB->setCFIType(*MF, Node->getCFIType());
+
// Insert the instruction into position in the block. This needs to
// happen before any custom inserter hook is called so that the
// hook knows where in the block to insert the replacement code.
@@ -1088,12 +1128,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = NumDefs; i < NumResults; ++i) {
- Register Reg = II.getImplicitDefs()[i - NumDefs];
+ Register Reg = II.implicit_defs()[i - NumDefs];
if (!Node->hasAnyUseOfValue(i))
continue;
// This implicitly defined physreg has a use.
UsedRegs.push_back(Reg);
- EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap);
}
}
@@ -1109,8 +1149,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
// Collect declared implicit uses.
const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
- UsedRegs.append(MCID.getImplicitUses(),
- MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ append_range(UsedRegs, MCID.implicit_uses());
// In addition to declared implicit uses, we must also check for
// direct RegisterSDNode operands.
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
@@ -1123,7 +1162,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
// Finally mark unused registers as dead.
- if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
+ if (!UsedRegs.empty() || !II.implicit_defs().empty() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
// STATEPOINT is too 'dynamic' to have meaningful machine description.
@@ -1159,14 +1198,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
#endif
llvm_unreachable("This target-independent node should have been selected!");
case ISD::EntryToken:
- llvm_unreachable("EntryToken should have been excluded from the schedule!");
case ISD::MERGE_VALUES:
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
- if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
+ if (DestReg.isVirtual() && SrcVal.isMachineOpcode() &&
SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Instead building a COPY to that vreg destination, build an
// IMPLICIT_DEF instruction instead.
@@ -1189,7 +1227,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
case ISD::CopyFromReg: {
unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
- EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap);
break;
}
case ISD::EH_LABEL:
@@ -1273,28 +1311,25 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
default: llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegDef:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// FIXME: Add dead flags for physical and virtual registers defined.
// For now, mark physical register defs as implicit to help fast
// regalloc. This makes inline asm look a lot like calls.
- MIB.addReg(Reg,
- RegState::Define |
- getImplRegState(Register::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical()));
}
break;
case InlineAsm::Kind_RegDefEarlyClobber:
case InlineAsm::Kind_Clobber:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MIB.addReg(Reg,
- RegState::Define | RegState::EarlyClobber |
- getImplRegState(Register::isPhysicalRegister(Reg)));
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Reg.isPhysical()));
ECRegs.push_back(Reg);
}
break;
case InlineAsm::Kind_RegUse: // Use of register.
case InlineAsm::Kind_Imm: // Immediate.
- case InlineAsm::Kind_Mem: // Addressing mode.
+ case InlineAsm::Kind_Mem: // Non-function addressing mode.
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
for (unsigned j = 0; j != NumVals; ++j, ++i)
@@ -1312,6 +1347,21 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
break;
+ case InlineAsm::Kind_Func: // Function addressing mode.
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ SDValue Op = Node->getOperand(i);
+ AddOperand(MIB, Op, 0, nullptr, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Adjust Target Flags for function reference.
+ if (auto *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ unsigned NewFlags =
+ MF->getSubtarget().classifyGlobalFunctionReference(
+ TGA->getGlobal());
+ unsigned LastIdx = MIB.getInstr()->getNumOperands() - 1;
+ MIB.getInstr()->getOperand(LastIdx).setTargetFlags(NewFlags);
+ }
+ }
}
}
@@ -1344,12 +1394,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
- MachineBasicBlock::iterator insertpos,
- bool UseInstrRefDebugInfo)
+ MachineBasicBlock::iterator insertpos)
: MF(mbb->getParent()), MRI(&MF->getRegInfo()),
TII(MF->getSubtarget().getInstrInfo()),
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
InsertPos(insertpos) {
- EmitDebugInstrRefs = UseInstrRefDebugInfo;
+ EmitDebugInstrRefs = mbb->getParent()->useDebugInstrRef();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index ced8f064b9be..959bce31c8b2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -44,10 +44,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
- void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
- bool IsClone, bool IsCloned,
- Register SrcReg,
- DenseMap<SDValue, Register> &VRBaseMap);
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap);
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
@@ -128,6 +126,10 @@ public:
/// Emit a DBG_VALUE $noreg, indicating a variable has no location.
MachineInstr *EmitDbgNoLocation(SDDbgValue *SD);
+ /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue.
+ MachineInstr *EmitDbgValueList(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
/// Emit a DBG_VALUE from the operands to SDDbgValue.
MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap);
@@ -154,8 +156,7 @@ public:
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
- MachineBasicBlock::iterator insertpos,
- bool UseInstrRefDebugInfo);
+ MachineBasicBlock::iterator insertpos);
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 56d35dfe8701..c3106216a060 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -142,10 +142,12 @@ private:
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
- RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128,
- RTLIB::Libcall Call_IEXT);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
void ExpandArgFPLibCall(SDNode *Node,
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -308,7 +310,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
// We don't want to shrink SNaNs. Converting the SNaN back to its real type
// can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
if (!APF.isSignaling()) {
- while (SVT != MVT::f32 && SVT != MVT::f16) {
+ while (SVT != MVT::f32 && SVT != MVT::f16 && SVT != MVT::bf16) {
SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
// Only do this if the target has a native EXTLOAD instruction from
@@ -550,16 +552,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize());
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedValue());
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
- } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) {
+ } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedValue())) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned StWidthBits = StWidth.getFixedSize();
+ unsigned StWidthBits = StWidth.getFixedValue();
unsigned LogStWidth = Log2_32(StWidthBits);
assert(LogStWidth < 32);
unsigned RoundWidth = 1 << LogStWidth;
@@ -767,10 +769,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Value = Result;
Chain = Ch;
- } else if (!isPowerOf2_64(SrcWidth.getKnownMinSize())) {
+ } else if (!isPowerOf2_64(SrcWidth.getKnownMinValue())) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned SrcWidthBits = SrcWidth.getFixedSize();
+ unsigned SrcWidthBits = SrcWidth.getFixedValue();
unsigned LogSrcWidth = Log2_32(SrcWidthBits);
assert(LogSrcWidth < 32);
unsigned RoundWidth = 1 << LogSrcWidth;
@@ -850,7 +852,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::Legal:
Value = SDValue(Node, 0);
Chain = SDValue(Node, 1);
@@ -1035,12 +1037,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
case ISD::SETCC:
+ case ISD::SETCCCARRY:
case ISD::VP_SETCC:
case ISD::BR_CC: {
unsigned Opc = Node->getOpcode();
unsigned CCOperand = Opc == ISD::SELECT_CC ? 4
: Opc == ISD::STRICT_FSETCC ? 3
: Opc == ISD::STRICT_FSETCCS ? 3
+ : Opc == ISD::SETCCCARRY ? 3
: (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2
: 1;
unsigned CompareOperand = Opc == ISD::BR_CC ? 2
@@ -1074,7 +1078,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
SimpleFinishLegalizing = false;
break;
case ISD::EXTRACT_ELEMENT:
- case ISD::FLT_ROUNDS_:
+ case ISD::GET_ROUNDING:
case ISD::MERGE_VALUES:
case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -1317,11 +1321,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::Expand:
if (ExpandNode(Node))
return;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::LibCall:
ConvertNodeToLibcall(Node);
return;
@@ -1717,8 +1721,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
DAG.getConstant(-Alignment.value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
- Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
- DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+ Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
Results.push_back(Tmp1);
Results.push_back(Tmp2);
@@ -2111,17 +2114,15 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}
-SDValue SelectionDAGLegalize::ExpandIntLibCall(
- SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
- RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) {
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
-
- default:
- LC = Call_IEXT;
- break;
-
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -2156,11 +2157,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
-
- default:
- LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT;
- break;
-
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2744,7 +2741,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
FA, Offset));
break;
}
- case ISD::FLT_ROUNDS_:
+ case ISD::GET_ROUNDING:
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
Results.push_back(Node->getOperand(0));
break;
@@ -2911,13 +2908,44 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::BF16_TO_FP: {
// Always expand bf16 to f32 casts, they lower to ext + shift.
- SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0));
- Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op);
+ //
+ // Note that the operand of this code can be bf16 or an integer type in case
+ // bf16 is not supported on the target and was softened.
+ SDValue Op = Node->getOperand(0);
+ if (Op.getValueType() == MVT::bf16) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i16, Op));
+ } else {
+ Op = DAG.getAnyExtOrTrunc(Op, dl, MVT::i32);
+ }
Op = DAG.getNode(
ISD::SHL, dl, MVT::i32, Op,
DAG.getConstant(16, dl,
TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op);
+ // Add fp_extend in case the output is bigger than f32.
+ if (Node->getValueType(0) != MVT::f32)
+ Op = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Op);
+ Results.push_back(Op);
+ break;
+ }
+ case ISD::FP_TO_BF16: {
+ SDValue Op = Node->getOperand(0);
+ if (Op.getValueType() != MVT::f32)
+ Op = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ Op = DAG.getNode(
+ ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op),
+ DAG.getConstant(16, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ // The result of this node can be bf16 or an integer type in case bf16 is
+ // not supported on the target and was softened to i16 for storage.
+ if (Node->getValueType(0) == MVT::bf16) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::bf16,
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Op));
+ } else {
+ Op = DAG.getAnyExtOrTrunc(Op, dl, Node->getValueType(0));
+ }
Results.push_back(Op);
break;
}
@@ -2961,7 +2989,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp2);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) {
@@ -3112,7 +3140,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::EXTRACT_ELEMENT: {
EVT OpTy = Node->getOperand(0).getValueType();
- if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ if (Node->getConstantOperandVal(1)) {
// 1 -> Hi
Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
DAG.getConstant(OpTy.getSizeInBits() / 2, dl,
@@ -3251,8 +3279,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
// Under fastmath, we can expand this node into a fround followed by
// a float-half conversion.
- SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
- DAG.getIntPtrConstant(0, dl));
+ SDValue FloatVal =
+ DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
Results.push_back(
DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal));
}
@@ -4379,24 +4408,28 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::SUB_PPCF128, Results);
break;
case ISD::SREM:
- Results.push_back(ExpandIntLibCall(
- Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
break;
case ISD::UREM:
- Results.push_back(ExpandIntLibCall(
- Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
break;
case ISD::SDIV:
- Results.push_back(ExpandIntLibCall(
- Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
break;
case ISD::UDIV:
- Results.push_back(ExpandIntLibCall(
- Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
break;
case ISD::SDIVREM:
case ISD::UDIVREM:
@@ -4404,9 +4437,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandDivRemLibCall(Node, Results);
break;
case ISD::MUL:
- Results.push_back(ExpandIntLibCall(
- Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32,
- RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
break;
case ISD::CTLZ_ZERO_UNDEF:
switch (Node->getSimpleValueType(0).SimpleTy) {
@@ -4696,7 +4730,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1);
else
Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1,
- DAG.getIntPtrConstant(0, dl));
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
Results.push_back(Tmp1);
break;
@@ -4756,8 +4790,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
Node->getFlags());
- Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp3, DAG.getIntPtrConstant(0, dl)));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
@@ -4787,7 +4822,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(
DAG.getNode(ISD::FP_ROUND, dl, OVT,
DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
- DAG.getIntPtrConstant(0, dl)));
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
case ISD::STRICT_FMA:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
@@ -4817,8 +4852,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// (fp_round (fpext a))
// which is a no-op. Mark it as a TRUNCating FP_ROUND.
const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
- Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3,
+ DAG.getIntPtrConstant(isTrunc, dl, /*isTarget=*/true)));
break;
}
case ISD::STRICT_FPOWI:
@@ -4850,8 +4886,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FEXP2:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
- Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp2, DAG.getIntPtrConstant(0, dl)));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b2df67f45c72..f1e80ce7e037 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1071,8 +1071,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
if (ST->isTruncatingStore())
// Do an FP_ROUND followed by a non-truncating store.
- Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
- Val, DAG.getIntPtrConstant(0, dl)));
+ Val = BitConvertToInteger(
+ DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), Val,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
else
Val = GetSoftenedFloat(Val);
@@ -2532,7 +2533,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
// Round the value to the desired precision (that of the source type).
return DAG.getNode(
ISD::FP_EXTEND, DL, NVT,
- DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));
+ DAG.getNode(ISD::FP_ROUND, DL, VT, NV,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)));
}
SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
@@ -2746,39 +2748,47 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
SDLoc dl(N);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
- Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2);
+ auto PromotionOpcode = GetPromotionOpcode(OVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
+ Op2 = DAG.getNode(PromotionOpcode, dl, NVT, Op2);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op0);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT SVT = N->getOperand(0).getValueType();
+
if (N->isStrictFPOpcode()) {
+ assert(RVT == MVT::f16);
SDValue Res =
DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
{N->getOperand(0), N->getOperand(1)});
@@ -2786,7 +2796,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
return Res;
}
- return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0));
+ return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), MVT::i16,
+ N->getOperand(0));
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
@@ -2821,13 +2832,14 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDLoc dl(N);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
// Round the value to the softened type.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
@@ -2835,33 +2847,36 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
SDLoc dl(N);
// Promote to the larger FP type.
- Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+ Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
SDLoc dl(N);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ auto PromotionOpcode = GetPromotionOpcode(OVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) {
@@ -2945,22 +2960,27 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
unsigned OpNo) {
assert(OpNo == 1 && "Only Operand 1 must need promotion here");
SDValue Op1 = N->getOperand(1);
+ EVT RVT = Op1.getValueType();
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType());
Op1 = GetSoftPromotedHalf(Op1);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ Op1 = DAG.getNode(GetPromotionOpcode(RVT, NVT), dl, NVT, Op1);
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0),
Op1);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
- SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
if (IsStrict) {
+ assert(SVT == MVT::f16);
SDValue Res =
DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
{N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
@@ -2969,31 +2989,35 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
return SDValue();
}
- return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op);
+ return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
SDValue Op = N->getOperand(0);
+ EVT SVT = Op.getValueType();
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
- SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+ SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
SDValue Op = N->getOperand(0);
+ EVT SVT = Op.getValueType();
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
- SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+ SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res,
N->getOperand(1));
@@ -3006,14 +3030,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N,
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
+ EVT SVT = Op0.getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), SVT);
Op0 = GetSoftPromotedHalf(Op0);
Op1 = GetSoftPromotedHalf(Op1);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ auto PromotionOpcode = GetPromotionOpcode(SVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1,
N->getOperand(2), N->getOperand(3), N->getOperand(4));
@@ -3025,14 +3051,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) {
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDLoc dl(N);
+ EVT SVT = Op0.getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
Op0 = GetSoftPromotedHalf(Op0);
Op1 = GetSoftPromotedHalf(Op1);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ auto PromotionOpcode = GetPromotionOpcode(SVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 228d4a43ccde..c9ce9071a25d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -137,8 +137,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
- case ISD::VP_FPTOSI:
- case ISD::VP_FPTOUI:
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
@@ -148,9 +148,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_UINT_SAT:
Res = PromoteIntRes_FP_TO_XINT_SAT(N); break;
- case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
+ case ISD::FP_TO_BF16:
+ case ISD::FP_TO_FP16:
+ Res = PromoteIntRes_FP_TO_FP16_BF16(N);
+ break;
- case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
+ case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break;
case ISD::AND:
case ISD::OR:
@@ -165,11 +168,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_SUB:
case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::VP_SMIN:
+ case ISD::VP_SMAX:
case ISD::SDIV:
case ISD::SREM:
case ISD::VP_SDIV:
case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::VP_UMIN:
+ case ISD::VP_UMAX:
case ISD::UDIV:
case ISD::UREM:
case ISD::VP_UDIV:
@@ -673,10 +680,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
NewOpc = ISD::STRICT_FP_TO_SINT;
- if (N->getOpcode() == ISD::VP_FPTOUI &&
- !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) &&
- TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT))
- NewOpc = ISD::VP_FPTOSI;
+ if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT))
+ NewOpc = ISD::VP_FP_TO_SINT;
SDValue Res;
if (N->isStrictFPOpcode()) {
@@ -685,7 +692,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) {
+ } else if (NewOpc == ISD::VP_FP_TO_SINT || NewOpc == ISD::VP_FP_TO_UINT) {
Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1),
N->getOperand(2)});
} else {
@@ -701,7 +708,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
N->getOpcode() == ISD::STRICT_FP_TO_UINT ||
- N->getOpcode() == ISD::VP_FPTOUI)
+ N->getOpcode() == ISD::VP_FP_TO_UINT)
? ISD::AssertZext
: ISD::AssertSext,
dl, NVT, Res,
@@ -716,14 +723,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) {
N->getOperand(1));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -836,7 +843,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
SDLoc dl(N);
SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
- makeArrayRef(Ops, NumOps));
+ ArrayRef(Ops, NumOps));
// Modified the sum result - switch anything that used the old sum to use
// the new one.
@@ -1555,7 +1562,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+ const APInt &MulImm = N->getConstantOperandAPInt(0);
return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits()));
}
@@ -1648,7 +1655,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_SETCC:
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
- case ISD::VP_SITOFP:
+ case ISD::VP_SINT_TO_FP:
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
@@ -1663,8 +1670,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::BF16_TO_FP:
case ISD::FP16_TO_FP:
- case ISD::VP_UITOFP:
+ case ISD::VP_UINT_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
@@ -1998,7 +2006,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
- if (N->getOpcode() == ISD::VP_SITOFP)
+ if (N->getOpcode() == ISD::VP_SINT_TO_FP)
return SDValue(DAG.UpdateNodeOperands(N,
SExtPromotedInteger(N->getOperand(0)),
N->getOperand(1), N->getOperand(2)),
@@ -2127,7 +2135,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
- if (N->getOpcode() == ISD::VP_UITOFP)
+ if (N->getOpcode() == ISD::VP_UINT_TO_FP)
return SDValue(DAG.UpdateNodeOperands(N,
ZExtPromotedInteger(N->getOperand(0)),
N->getOperand(1), N->getOperand(2)),
@@ -2420,17 +2428,21 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
- case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
+ case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::LROUND:
+ case ISD::LRINT:
case ISD::STRICT_LLROUND:
case ISD::STRICT_LLRINT:
case ISD::LLROUND:
- case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break;
+ case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -2866,15 +2878,29 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
ISD::CondCode CondC;
std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
- GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
- GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
// Value types
EVT NVT = LHSL.getValueType();
EVT CCT = getSetCCResultType(NVT);
+ // If the upper halves are all sign bits, then we can perform the MINMAX on
+ // the lower half and sign-extend the result to the upper half.
+ unsigned NumHalfBits = NVT.getScalarSizeInBits();
+ if (DAG.ComputeNumSignBits(LHS) > NumHalfBits &&
+ DAG.ComputeNumSignBits(RHS) > NumHalfBits) {
+ Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL);
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo,
+ DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL));
+ return;
+ }
+
// Hi part is always the same op
Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
@@ -2913,13 +2939,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
- ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2))
+ ? DAG.getNode(ISD::UADDO, dl, VTList, ArrayRef(HiOps, 2))
: DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
- ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2))
+ ? DAG.getNode(ISD::USUBO, dl, VTList, ArrayRef(HiOps, 2))
: DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
}
return;
@@ -2962,18 +2988,18 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
} else {
RevOpc = ISD::ADD;
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
}
SDValue OVF = Lo.getValue(1);
switch (BoolType) {
case TargetLoweringBase::UndefinedBooleanContent:
OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLoweringBase::ZeroOrOneBooleanContent:
OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
@@ -2987,27 +3013,21 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
- SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
+ SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
- SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
- return;
- }
+ SDValue Carry;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Carry = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
- SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
- DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
- SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
- ISD::SETULT);
- SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
- DAG.getConstant(1, dl, NVT), Carry1);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
} else {
Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
@@ -3280,6 +3300,14 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
GetExpandedInteger(N0, Lo, Hi);
EVT NVT = Lo.getValueType();
+ // If the upper half is all sign bits, then we can perform the ABS on the
+ // lower half and zero-extend.
+ if (DAG.ComputeNumSignBits(N0) > NVT.getScalarSizeInBits()) {
+ Lo = DAG.getNode(ISD::ABS, dl, NVT, Lo);
+ Hi = DAG.getConstant(0, dl, NVT);
+ return;
+ }
+
// If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we
// use in LegalizeDAG. The SUB part of the expansion is based on
// ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that
@@ -3364,15 +3392,15 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
Hi = DAG.getConstant(0, dl, NVT);
}
-void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
+void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
- Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
+ Lo = DAG.getNode(ISD::GET_ROUNDING, dl, {NVT, MVT::Other}, N->getOperand(0));
SDValue Chain = Lo.getValue(1);
- // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
+ // The high part is the sign of Lo, as -1 is a valid value for GET_ROUNDING
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl));
@@ -3450,17 +3478,57 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
SplitInteger(Res, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
+void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
"Input type needs to be promoted!");
EVT VT = Op.getValueType();
+ if (VT == MVT::f16) {
+ VT = MVT::f32;
+ // Extend to f32.
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op});
+ Chain = Op.getValue(1);
+ } else {
+ Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op);
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (N->getOpcode() == ISD::LLROUND ||
+ if (N->getOpcode() == ISD::LROUND ||
+ N->getOpcode() == ISD::STRICT_LROUND) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lround input type!");
+ } else if (N->getOpcode() == ISD::LRINT ||
+ N->getOpcode() == ISD::STRICT_LRINT) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lrint input type!");
+ } else if (N->getOpcode() == ISD::LLROUND ||
N->getOpcode() == ISD::STRICT_LLROUND) {
if (VT == MVT::f32)
LC = RTLIB::LLROUND_F32;
@@ -3489,9 +3557,7 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
} else
llvm_unreachable("Unexpected opcode!");
- SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
@@ -4046,70 +4112,6 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
ReplaceValueWith(SDValue(Node, 1), Ovf);
}
-// Emit a call to __udivei4 and friends which require
-// the arguments be based on the stack
-// and extra argument that contains the number of bits of the operands.
-// Returns the result of the call operation.
-static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI,
- const RTLIB::Libcall &LC,
- SelectionDAG &DAG, SDNode *N,
- const SDLoc &DL, const EVT &VT) {
-
- SDValue InChain = DAG.getEntryNode();
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
-
- // The signature of __udivei4 is
- // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b,
- // unsigned int bits)
- EVT ArgVT = N->op_begin()->getValueType();
- assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 &&
- "Unexpected argument type for lowering");
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-
- SDValue Output = DAG.CreateStackTemporary(ArgVT);
- Entry.Node = Output;
- Entry.Ty = ArgTy->getPointerTo();
- Entry.IsSExt = false;
- Entry.IsZExt = false;
- Args.push_back(Entry);
-
- for (const llvm::SDUse &Op : N->ops()) {
- SDValue StackPtr = DAG.CreateStackTemporary(ArgVT);
- InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo());
- Entry.Node = StackPtr;
- Entry.Ty = ArgTy->getPointerTo();
- Entry.IsSExt = false;
- Entry.IsZExt = false;
- Args.push_back(Entry);
- }
-
- int Bits = N->getOperand(0)
- .getValueType()
- .getTypeForEVT(*DAG.getContext())
- ->getIntegerBitWidth();
- Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout()));
- Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.IsSExt = false;
- Entry.IsZExt = true;
- Args.push_back(Entry);
-
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL)
- .setChain(InChain)
- .setLibCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args))
- .setDiscardResult();
-
- SDValue Chain = TLI.LowerCallTo(CLI).second;
-
- return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo());
-}
-
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -4131,14 +4133,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::SDIV_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4146,6 +4140,111 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ SDValue Shiftee = N->getOperand(0);
+ EVT VT = Shiftee.getValueType();
+ SDValue ShAmt = N->getOperand(1);
+ EVT ShAmtVT = ShAmt.getValueType();
+
+ // This legalization is optimal when the shift is by a multiple of byte width,
+ // %x * 8 <-> %x << 3 so 3 low bits should be be known zero.
+ bool ShiftByByteMultiple =
+ DAG.computeKnownBits(ShAmt).countMinTrailingZeros() >= 3;
+
+ // If we can't do it as one step, we'll have two uses of shift amount,
+ // and thus must freeze it.
+ if (!ShiftByByteMultiple)
+ ShAmt = DAG.getFreeze(ShAmt);
+
+ unsigned VTBitWidth = VT.getScalarSizeInBits();
+ assert(VTBitWidth % 8 == 0 && "Shifting a not byte multiple value?");
+ unsigned VTByteWidth = VTBitWidth / 8;
+ assert(isPowerOf2_32(VTByteWidth) &&
+ "Shiftee type size is not a power of two!");
+ unsigned StackSlotByteWidth = 2 * VTByteWidth;
+ unsigned StackSlotBitWidth = 8 * StackSlotByteWidth;
+ EVT StackSlotVT = EVT::getIntegerVT(*DAG.getContext(), StackSlotBitWidth);
+
+ // Get a temporary stack slot 2x the width of our VT.
+ // FIXME: reuse stack slots?
+ // FIXME: should we be more picky about alignment?
+ Align StackSlotAlignment(1);
+ SDValue StackPtr = DAG.CreateStackTemporary(
+ TypeSize::getFixed(StackSlotByteWidth), StackSlotAlignment);
+ EVT PtrTy = StackPtr.getValueType();
+ SDValue Ch = DAG.getEntryNode();
+
+ MachinePointerInfo StackPtrInfo = MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex());
+
+ // Extend the value, that is being shifted, to the entire stack slot's width.
+ SDValue Init;
+ if (N->getOpcode() != ISD::SHL) {
+ unsigned WideningOpc =
+ N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Init = DAG.getNode(WideningOpc, dl, StackSlotVT, Shiftee);
+ } else {
+ // For left-shifts, pad the Shiftee's LSB with zeros to twice it's width.
+ SDValue AllZeros = DAG.getConstant(0, dl, VT);
+ Init = DAG.getNode(ISD::BUILD_PAIR, dl, StackSlotVT, AllZeros, Shiftee);
+ }
+ // And spill it into the stack slot.
+ Ch = DAG.getStore(Ch, dl, Init, StackPtr, StackPtrInfo, StackSlotAlignment);
+
+ // Now, compute the full-byte offset into stack slot from where we can load.
+ // We have shift amount, which is in bits, but in multiples of byte.
+ // So just divide by CHAR_BIT.
+ SDNodeFlags Flags;
+ if (ShiftByByteMultiple)
+ Flags.setExact(true);
+ SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(3, dl, ShAmtVT), Flags);
+ // And clamp it, because OOB load is an immediate UB,
+ // while shift overflow would have *just* been poison.
+ ByteOffset = DAG.getNode(ISD::AND, dl, ShAmtVT, ByteOffset,
+ DAG.getConstant(VTByteWidth - 1, dl, ShAmtVT));
+ // We have exactly two strategies on indexing into stack slot here:
+ // 1. upwards starting from the beginning of the slot
+ // 2. downwards starting from the middle of the slot
+ // On little-endian machine, we pick 1. for right shifts and 2. for left-shift
+ // and vice versa on big-endian machine.
+ bool WillIndexUpwards = N->getOpcode() != ISD::SHL;
+ if (DAG.getDataLayout().isBigEndian())
+ WillIndexUpwards = !WillIndexUpwards;
+
+ SDValue AdjStackPtr;
+ if (WillIndexUpwards) {
+ AdjStackPtr = StackPtr;
+ } else {
+ AdjStackPtr = DAG.getMemBasePlusOffset(
+ StackPtr, DAG.getConstant(VTByteWidth, dl, PtrTy), dl);
+ ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT);
+ }
+
+ // Get the pointer somewhere into the stack slot from which we need to load.
+ ByteOffset = DAG.getSExtOrTrunc(ByteOffset, dl, PtrTy);
+ AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, ByteOffset, dl);
+
+ // And load it! While the load is not legal, legalizing it is obvious.
+ SDValue Res = DAG.getLoad(
+ VT, dl, Ch, AdjStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), Align(1));
+ // We've performed the shift by a CHAR_BIT * [_ShAmt / CHAR_BIT_]
+
+ // If we may still have a less-than-CHAR_BIT to shift by, do so now.
+ if (!ShiftByByteMultiple) {
+ SDValue ShAmtRem = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(7, dl, ShAmtVT));
+ Res = DAG.getNode(N->getOpcode(), dl, VT, Res, ShAmtRem);
+ }
+
+ // Finally, split the computed value.
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -4181,7 +4280,24 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
(Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
Action == TargetLowering::Custom;
- if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
+ unsigned ExpansionFactor = 1;
+ // That VT->NVT expansion is one step. But will we re-expand NVT?
+ for (EVT TmpVT = NVT;;) {
+ EVT NewTMPVT = TLI.getTypeToTransformTo(*DAG.getContext(), TmpVT);
+ if (NewTMPVT == TmpVT)
+ break;
+ TmpVT = NewTMPVT;
+ ++ExpansionFactor;
+ }
+
+ TargetLowering::ShiftLegalizationStrategy S =
+ TLI.preferredShiftLegalizationStrategy(DAG, N, ExpansionFactor);
+
+ if (S == TargetLowering::ShiftLegalizationStrategy::ExpandThroughStack)
+ return ExpandIntRes_ShiftThroughStack(N, Lo, Hi);
+
+ if (LegalOrCustom &&
+ S != TargetLowering::ShiftLegalizationStrategy::LowerToLibcall) {
// Expand the subcomponents.
SDValue LHSL, LHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
@@ -4330,14 +4446,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::SREM_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4428,7 +4536,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
else if (VT == MVT::i128)
LC = RTLIB::MULO_I128;
- if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ // If we don't have the libcall or if the function we are compiling is the
+ // implementation of the expected libcall (avoid inf-loop), expand inline.
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
+ TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
// FIXME: This is not an optimal expansion, but better than crashing.
EVT WideVT =
EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
@@ -4504,6 +4615,22 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
return;
}
+ // Try to expand UDIV by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UDIV_I16;
@@ -4513,14 +4640,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::UDIV_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4539,6 +4658,22 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
return;
}
+ // Try to expand UREM by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UREM_I16;
@@ -4548,14 +4683,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::UREM_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5294,7 +5421,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
}
-
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -5352,7 +5478,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isScalableVector() &&
"Type must be promoted to a scalable vector type");
- APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+ const APInt &StepVal = N->getConstantOperandAPInt(0);
return DAG.getStepVector(dl, NOutVT,
StepVal.sext(NOutVT.getScalarSizeInBits()));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 8fe9a83b9c3d..5e0349593139 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -722,9 +722,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- assert(Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+#ifndef NDEBUG
+ EVT VT = Result.getValueType();
+ LLVMContext &Ctx = *DAG.getContext();
+ assert((VT == EVT::getIntegerVT(Ctx, 80) ||
+ VT == TLI.getTypeToTransformTo(Ctx, Op.getValueType())) &&
"Invalid type for softened float");
+#endif
AnalyzeNewValue(Result);
auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
@@ -759,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
// a constant i8 operand.
// We don't currently support the scalarization of scalable vector types.
- assert(Result.getValueSizeInBits().getFixedSize() >=
+ assert(Result.getValueSizeInBits().getFixedValue() >=
Op.getScalarValueSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 6696b79cf885..b97e44a01319 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -324,7 +324,7 @@ private:
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
- SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
@@ -354,7 +354,7 @@ private:
SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_DIVFIX(SDNode *N);
- SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
+ SDValue PromoteIntRes_GET_ROUNDING(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
SDValue PromoteIntRes_VP_REDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
@@ -437,11 +437,11 @@ private:
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -457,6 +457,7 @@ private:
void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ShiftThroughStack (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -862,6 +863,8 @@ private:
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
+ SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
bool SplitSETCC = false);
@@ -891,6 +894,7 @@ private:
SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo);
SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo);
@@ -947,6 +951,7 @@ private:
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
+ SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N);
@@ -958,6 +963,7 @@ private:
SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VECTOR_REVERSE(SDNode *N);
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
@@ -984,6 +990,7 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c6885677d644..21b5255c8f72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -197,8 +197,7 @@ void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
GetExpandedOp(N->getOperand(0), Lo, Hi);
- SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
- Hi : Lo;
+ SDValue Part = N->getConstantOperandVal(1) ? Hi : Lo;
assert(Part.getValueType() == N->getValueType(0) &&
"Type twice as big as expanded type not itself expanded!");
@@ -209,7 +208,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue OldVec = N->getOperand(0);
- unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ ElementCount OldEltCount = OldVec.getValueType().getVectorElementCount();
EVT OldEltVT = OldVec.getValueType().getVectorElementType();
SDLoc dl(N);
@@ -223,14 +222,13 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// the input vector. If so, extend the elements of the input vector to the
// same bitwidth as the result before expanding.
assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
- EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldEltCount);
OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
}
- SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
- EVT::getVectorVT(*DAG.getContext(),
- NewVT, 2*OldElts),
- OldVec);
+ SDValue NewVec = DAG.getNode(
+ ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(), NewVT, OldEltCount * 2), OldVec);
// Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
SDValue Idx = N->getOperand(1);
@@ -359,8 +357,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SmallVector<SDValue, 8> Ops;
IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
- SDValue Vec =
- DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts));
+ SDValue Vec = DAG.getBuildVector(NVT, dl, ArrayRef(Ops.data(), NumElts));
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
@@ -403,7 +400,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
SDValue Lo, Hi;
GetExpandedOp(N->getOperand(0), Lo, Hi);
- return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+ return N->getConstantOperandVal(1) ? Hi : Lo;
}
SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f5a1eae1e7fe..e245b3cb4c6d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -132,6 +132,7 @@ class VectorLegalizer {
SDValue ExpandVSELECT(SDNode *Node);
SDValue ExpandVP_SELECT(SDNode *Node);
SDValue ExpandVP_MERGE(SDNode *Node);
+ SDValue ExpandVP_REM(SDNode *Node);
SDValue ExpandSELECT(SDNode *Node);
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
@@ -492,7 +493,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (LowerOperationWrapper(Node, ResultVals))
break;
LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::Expand:
LLVM_DEBUG(dbgs() << "Expanding\n");
Expand(Node, ResultVals);
@@ -594,7 +595,8 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
- Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
else
Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
@@ -728,12 +730,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node));
return;
+ case ISD::VP_BSWAP:
+ Results.push_back(TLI.expandVPBSWAP(Node, DAG));
+ return;
case ISD::VSELECT:
Results.push_back(ExpandVSELECT(Node));
return;
case ISD::VP_SELECT:
Results.push_back(ExpandVP_SELECT(Node));
return;
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ if (SDValue Expanded = ExpandVP_REM(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SELECT:
Results.push_back(ExpandSELECT(Node));
return;
@@ -776,12 +788,24 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::BITREVERSE:
ExpandBITREVERSE(Node, Results);
return;
+ case ISD::VP_BITREVERSE:
+ if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::CTPOP:
if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
Results.push_back(Expanded);
return;
}
break;
+ case ISD::VP_CTPOP:
+ if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
@@ -789,6 +813,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::VP_CTLZ:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
@@ -796,8 +827,17 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::VP_CTTZ:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::FSHL:
+ case ISD::VP_FSHL:
case ISD::FSHR:
+ case ISD::VP_FSHR:
if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
Results.push_back(Expanded);
return;
@@ -847,6 +887,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::USHLSAT:
+ case ISD::SSHLSAT:
+ if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
// Expand the fpsosisat if it is scalable to prevent it from unrolling below.
@@ -954,10 +1001,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all one or all zero.
- if (VT.isFixedLengthVector())
- Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
- else
- Mask = DAG.getSplatVector(MaskTy, DL, Mask);
+ Mask = DAG.getSplat(MaskTy, DL, Mask);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1300,8 +1344,7 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
return DAG.UnrollVectorOp(Node);
SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
- SDValue SplatEVL = IsFixedLen ? DAG.getSplatBuildVector(EVLVecVT, DL, EVL)
- : DAG.getSplatVector(EVLVecVT, DL, EVL);
+ SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
SDValue EVLMask =
DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
@@ -1309,6 +1352,30 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
}
+SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
+ // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
+ EVT VT = Node->getValueType(0);
+
+ unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
+
+ if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
+ return SDValue();
+
+ SDLoc DL(Node);
+
+ SDValue Dividend = Node->getOperand(0);
+ SDValue Divisor = Node->getOperand(1);
+ SDValue Mask = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ // X % Y -> X-X/Y*Y
+ SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
+ SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
+ return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
+}
+
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 143abc08eeea..af5ea1ce5f45 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -27,6 +27,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -975,6 +977,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi);
+ break;
case ISD::MLOAD:
SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
break;
@@ -1006,23 +1011,34 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::ABS:
+ case ISD::VP_ABS:
case ISD::BITREVERSE:
+ case ISD::VP_BITREVERSE:
case ISD::BSWAP:
+ case ISD::VP_BSWAP:
case ISD::CTLZ:
+ case ISD::VP_CTLZ:
case ISD::CTTZ:
+ case ISD::VP_CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
- case ISD::FABS:
+ case ISD::VP_CTPOP:
+ case ISD::FABS: case ISD::VP_FABS:
case ISD::FCEIL:
+ case ISD::VP_FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FFLOOR:
+ case ISD::VP_FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
+ case ISD::VP_FNEARBYINT:
case ISD::FNEG: case ISD::VP_FNEG:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
@@ -1031,21 +1047,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND:
case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
- case ISD::VP_FPTOSI:
+ case ISD::VP_FP_TO_SINT:
case ISD::FP_TO_UINT:
- case ISD::VP_FPTOUI:
+ case ISD::VP_FP_TO_UINT:
case ISD::FRINT:
+ case ISD::VP_FRINT:
case ISD::FROUND:
+ case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
+ case ISD::VP_FROUNDEVEN:
case ISD::FSIN:
- case ISD::FSQRT:
+ case ISD::FSQRT: case ISD::VP_SQRT:
case ISD::FTRUNC:
+ case ISD::VP_FROUNDTOZERO:
case ISD::SINT_TO_FP:
- case ISD::VP_SITOFP:
+ case ISD::VP_SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::VP_TRUNCATE:
case ISD::UINT_TO_FP:
- case ISD::VP_UITOFP:
+ case ISD::VP_UINT_TO_FP:
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -1066,8 +1086,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FADD: case ISD::VP_FADD:
case ISD::FSUB: case ISD::VP_FSUB:
case ISD::FMUL: case ISD::VP_FMUL:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
+ case ISD::FMINNUM: case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
case ISD::SDIV: case ISD::VP_SDIV:
@@ -1083,10 +1103,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UREM: case ISD::VP_UREM:
case ISD::SREM: case ISD::VP_SREM:
case ISD::FREM: case ISD::VP_FREM:
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
- case ISD::UMAX:
+ case ISD::SMIN: case ISD::VP_SMIN:
+ case ISD::SMAX: case ISD::VP_SMAX:
+ case ISD::UMIN: case ISD::VP_UMIN:
+ case ISD::UMAX: case ISD::VP_UMAX:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -1095,11 +1115,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::VP_FCOPYSIGN:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
+ case ISD::VP_FSHL:
case ISD::FSHR:
+ case ISD::VP_FSHR:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
@@ -1143,13 +1166,13 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
MachinePointerInfo &MPI, SDValue &Ptr,
uint64_t *ScaledOffset) {
SDLoc DL(N);
- unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8;
+ unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8;
if (MemVT.isScalableVector()) {
SDNodeFlags Flags;
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
+ APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize));
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
Flags.setNoUnsignedWrap(true);
if (ScaledOffset)
@@ -1465,7 +1488,11 @@ void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
SDLoc DL(N);
SDValue ArgLo, ArgHi;
SDValue Test = N->getOperand(1);
- GetSplitVector(N->getOperand(0), ArgLo, ArgHi);
+ SDValue FpValue = N->getOperand(0);
+ if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(FpValue, ArgLo, ArgHi);
+ else
+ std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue));
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
@@ -1900,7 +1927,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
else
MPI = LD->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
@@ -1921,6 +1948,87 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
ReplaceValueWith(SDValue(LD, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
+ SDValue &Lo, SDValue &Hi) {
+ assert(SLD->isUnindexed() &&
+ "Indexed VP strided load during type legalization!");
+ assert(SLD->getOffset().isUndef() &&
+ "Unexpected indexed variable-length load offset");
+
+ SDLoc DL(SLD);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0));
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty);
+
+ SDValue Mask = SLD->getMask();
+ SDValue LoMask, HiMask;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, LoMask, HiMask);
+ else
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+ }
+
+ SDValue LoEVL, HiEVL;
+ std::tie(LoEVL, HiEVL) =
+ DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL);
+
+ // Generate the low vp_strided_load
+ Lo = DAG.getStridedLoadVP(
+ SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL,
+ SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(),
+ LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad());
+
+ if (HiIsEmpty) {
+ // The high vp_strided_load has zero storage size. We therefore simply set
+ // it to the low vp_strided_load and rely on subsequent removal from the
+ // chain.
+ Hi = Lo;
+ } else {
+ // Generate the high vp_strided_load.
+ // To calculate the high base address, we need to sum to the low base
+ // address stride number of bytes for each element already loaded by low,
+ // that is: Ptr = Ptr + (LoEVL * Stride)
+ EVT PtrVT = SLD->getBasePtr().getValueType();
+ SDValue Increment =
+ DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
+ DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT));
+ SDValue Ptr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment);
+
+ Align Alignment = SLD->getOriginalAlign();
+ if (LoMemVT.isScalableVector())
+ Alignment = commonAlignment(
+ Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()),
+ MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ SLD->getAAInfo(), SLD->getRanges());
+
+ Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(),
+ HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(),
+ SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO,
+ SLD->isExpandingLoad());
+ }
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(SLD, 1), Ch);
+}
+
void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue &Lo, SDValue &Hi) {
assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
@@ -1983,7 +2091,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
else
MPI = MLD->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
@@ -2286,13 +2394,13 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// If Lo or Hi uses elements from at most two of the four input vectors, then
// express it as a vector shuffle of those two inputs. Otherwise extract the
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
- SmallVector<int> OrigMask(N->getMask().begin(), N->getMask().end());
+ SmallVector<int> OrigMask(N->getMask());
// Try to pack incoming shuffles/inputs.
auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts,
&DL](SmallVectorImpl<int> &Mask) {
// Check if all inputs are shuffles of the same operands or non-shuffles.
MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs;
- for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) {
+ for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) {
SDValue Input = Inputs[Idx];
auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode());
if (!Shuffle ||
@@ -2339,7 +2447,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear();
}
// Check if any concat_vectors can be simplified.
- SmallBitVector UsedSubVector(2 * array_lengthof(Inputs));
+ SmallBitVector UsedSubVector(2 * std::size(Inputs));
for (int &Idx : Mask) {
if (Idx == UndefMaskElem)
continue;
@@ -2359,7 +2467,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
if (UsedSubVector.count() > 1) {
SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs;
- for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ for (unsigned I = 0; I < std::size(Inputs); ++I) {
if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1))
continue;
if (Pairs.empty() || Pairs.back().size() == 2)
@@ -2403,7 +2511,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Try to remove extra shuffles (except broadcasts) and shuffles with the
// reused operands.
Changed = false;
- for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ for (unsigned I = 0; I < std::size(Inputs); ++I) {
auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode());
if (!Shuffle)
continue;
@@ -2495,15 +2603,15 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
NewElts](SmallVectorImpl<int> &Mask) {
SetVector<SDValue> UniqueInputs;
SetVector<SDValue> UniqueConstantInputs;
- for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
- if (IsConstant(Inputs[I]))
- UniqueConstantInputs.insert(Inputs[I]);
- else if (!Inputs[I].isUndef())
- UniqueInputs.insert(Inputs[I]);
+ for (const auto &I : Inputs) {
+ if (IsConstant(I))
+ UniqueConstantInputs.insert(I);
+ else if (!I.isUndef())
+ UniqueInputs.insert(I);
}
// Adjust mask in case of reused inputs. Also, need to insert constant
// inputs at first, otherwise it affects the final outcome.
- if (UniqueInputs.size() != array_lengthof(Inputs)) {
+ if (UniqueInputs.size() != std::size(Inputs)) {
auto &&UniqueVec = UniqueInputs.takeVector();
auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
unsigned ConstNum = UniqueConstantVec.size();
@@ -2541,8 +2649,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands.
unsigned FirstMaskIdx = High * NewElts;
- SmallVector<int> Mask(NewElts * array_lengthof(Inputs), UndefMaskElem);
- copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
+ SmallVector<int> Mask(NewElts * std::size(Inputs), UndefMaskElem);
+ copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
assert(!Output && "Expected default initialized initial value.");
TryPeekThroughShufflesInputs(Mask);
MakeUniqueInputs(Mask);
@@ -2561,7 +2669,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
return SecondIteration;
};
processShuffleMasks(
- Mask, array_lengthof(Inputs), array_lengthof(Inputs),
+ Mask, std::size(Inputs), std::size(Inputs),
/*NumOfUsedRegs=*/1,
[&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); },
[&Output, &DAG = DAG, NewVT, &DL, &Inputs,
@@ -2707,6 +2815,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_STORE:
Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo);
+ break;
case ISD::MSTORE:
Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
break;
@@ -2725,6 +2836,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::VP_SINT_TO_FP:
+ case ISD::VP_UINT_TO_FP:
if (N->getValueType(0).bitsLT(
N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
@@ -2737,6 +2850,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_EXTEND:
@@ -2999,29 +3114,57 @@ SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
// We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
-
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
SDValue Lo, Hi;
- if (SubVT.isScalableVector() !=
- N->getOperand(0).getValueType().isScalableVector())
- report_fatal_error("Extracting a fixed-length vector from an illegal "
- "scalable vector is not yet supported");
-
GetSplitVector(N->getOperand(0), Lo, Hi);
- uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
+ uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements();
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- if (IdxVal < LoElts) {
- assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts &&
+ if (IdxVal < LoEltsMin) {
+ assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin &&
"Extracted subvector crosses vector split!");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
- } else {
+ } else if (SubVT.isScalableVector() ==
+ N->getOperand(0).getValueType().isScalableVector())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
- DAG.getVectorIdxConstant(IdxVal - LoElts, dl));
- }
+ DAG.getVectorIdxConstant(IdxVal - LoEltsMin, dl));
+
+ // After this point the DAG node only permits extracting fixed-width
+ // subvectors from scalable vectors.
+ assert(SubVT.isFixedLengthVector() &&
+ "Extracting scalable subvector from fixed-width unsupported");
+
+ // If the element type is i1 and we're not promoting the result, then we may
+ // end up loading the wrong data since the bits are packed tightly into
+ // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal)
+ // type at index 4, then we will load a byte starting at index 0.
+ if (SubVT.getScalarType() == MVT::i1)
+ report_fatal_error("Don't know how to extract fixed-width predicate "
+ "subvector from a scalable predicate vector");
+
+ // Spill the vector to the stack. We should use the alignment for
+ // the smallest part.
+ SDValue Vec = N->getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
+
+ // Extract the subvector by loading the correct part.
+ StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx);
+
+ return DAG.getLoad(
+ SubVT, dl, Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -3029,8 +3172,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Idx = N->getOperand(1);
EVT VecVT = Vec.getValueType();
- if (isa<ConstantSDNode>(Idx)) {
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = Index->getZExtValue();
SDValue Lo, Hi;
GetSplitVector(Vec, Lo, Hi);
@@ -3167,11 +3310,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector()) {
Alignment = commonAlignment(Alignment,
- LoMemVT.getSizeInBits().getKnownMinSize() / 8);
+ LoMemVT.getSizeInBits().getKnownMinValue() / 8);
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
} else
MPI = N->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
@@ -3186,6 +3329,80 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N,
+ unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?");
+ assert(N->getOffset().isUndef() && "Unexpected VP strided store offset");
+
+ SDLoc DL(N);
+
+ SDValue Data = N->getValue();
+ SDValue LoData, HiData;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Data, LoData, HiData);
+ else
+ std::tie(LoData, HiData) = DAG.SplitVector(Data, DL);
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs(
+ N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty);
+
+ SDValue Mask = N->getMask();
+ SDValue LoMask, HiMask;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC)
+ SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
+ else if (getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, LoMask, HiMask);
+ else
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+
+ SDValue LoEVL, HiEVL;
+ std::tie(LoEVL, HiEVL) =
+ DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL);
+
+ // Generate the low vp_strided_store
+ SDValue Lo = DAG.getStridedStoreVP(
+ N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(),
+ N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(),
+ N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore());
+
+ // If the high vp_strided_store has zero storage size, only the low
+ // vp_strided_store is needed.
+ if (HiIsEmpty)
+ return Lo;
+
+ // Generate the high vp_strided_store.
+ // To calculate the high base address, we need to sum to the low base
+ // address stride number of bytes for each element already stored by low,
+ // that is: Ptr = Ptr + (LoEVL * Stride)
+ EVT PtrVT = N->getBasePtr().getValueType();
+ SDValue Increment =
+ DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
+ DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT));
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment);
+
+ Align Alignment = N->getOriginalAlign();
+ if (LoMemVT.isScalableVector())
+ Alignment = commonAlignment(Alignment,
+ LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(N->getPointerInfo().getAddrSpace()),
+ MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
+
+ SDValue Hi = DAG.getStridedStoreVP(
+ N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask,
+ HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
assert(N->isUnindexed() && "Indexed masked store of vector?");
@@ -3243,11 +3460,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector()) {
Alignment = commonAlignment(
- Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8);
+ Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
} else
MPI = N->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
@@ -3593,7 +3810,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
// The result (and the first input) has a legal vector type, but the second
// input needs splitting.
- return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+
+ SDLoc DL(N);
+
+ EVT LHSLoVT, LHSHiVT;
+ std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT))
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+
+ SDValue LHSLo, LHSHi;
+ std::tie(LHSLo, LHSHi) =
+ DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT);
+
+ SDValue RHSLo, RHSHi;
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);
+
+ SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo);
+ SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
@@ -3683,6 +3919,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
+ break;
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
@@ -3692,6 +3931,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_GATHER:
Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N));
break;
+ case ISD::VECTOR_REVERSE:
+ Res = WidenVecRes_VECTOR_REVERSE(N);
+ break;
case ISD::ADD: case ISD::VP_ADD:
case ISD::AND: case ISD::VP_AND:
@@ -3704,14 +3946,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SHL: case ISD::VP_SHL:
case ISD::SRA: case ISD::VP_ASHR:
case ISD::SRL: case ISD::VP_LSHR:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
+ case ISD::FMINNUM: case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
- case ISD::UMAX:
+ case ISD::SMIN: case ISD::VP_SMIN:
+ case ISD::SMAX: case ISD::VP_SMAX:
+ case ISD::UMIN: case ISD::VP_UMIN:
+ case ISD::UMAX: case ISD::VP_UMAX:
case ISD::UADDSAT:
case ISD::SADDSAT:
case ISD::USUBSAT:
@@ -3738,6 +3980,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FMUL:
case ISD::VP_FDIV:
case ISD::VP_FREM:
+ case ISD::VP_FCOPYSIGN:
Res = WidenVecRes_Binary(N);
break;
@@ -3748,7 +3991,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// If the target has custom/legal support for the scalar FP intrinsic ops
// (they are probably not destined to become libcalls), then widen those
// like any other binary ops.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::FADD:
case ISD::FMUL:
@@ -3809,17 +4052,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND:
case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
- case ISD::VP_FPTOSI:
+ case ISD::VP_FP_TO_SINT:
case ISD::FP_TO_UINT:
- case ISD::VP_FPTOUI:
+ case ISD::VP_FP_TO_UINT:
case ISD::SIGN_EXTEND:
case ISD::VP_SIGN_EXTEND:
case ISD::SINT_TO_FP:
- case ISD::VP_SITOFP:
+ case ISD::VP_SINT_TO_FP:
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
- case ISD::VP_UITOFP:
+ case ISD::VP_UINT_TO_FP:
case ISD::ZERO_EXTEND:
case ISD::VP_ZERO_EXTEND:
Res = WidenVecRes_Convert(N);
@@ -3851,17 +4094,34 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// If the target has custom/legal support for the scalar FP intrinsic ops
// (they are probably not destined to become libcalls), then widen those
// like any other unary ops.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ABS:
+ case ISD::VP_ABS:
case ISD::BITREVERSE:
+ case ISD::VP_BITREVERSE:
case ISD::BSWAP:
+ case ISD::VP_BSWAP:
case ISD::CTLZ:
+ case ISD::VP_CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
+ case ISD::VP_CTPOP:
case ISD::CTTZ:
+ case ISD::VP_CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
case ISD::FNEG: case ISD::VP_FNEG:
+ case ISD::VP_FABS:
+ case ISD::VP_SQRT:
+ case ISD::VP_FCEIL:
+ case ISD::VP_FFLOOR:
+ case ISD::VP_FRINT:
+ case ISD::VP_FNEARBYINT:
+ case ISD::VP_FROUND:
+ case ISD::VP_FROUNDEVEN:
+ case ISD::VP_FROUNDTOZERO:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FCANONICALIZE:
@@ -3869,7 +4129,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
+ case ISD::VP_FSHL:
case ISD::FSHR:
+ case ISD::VP_FSHR:
Res = WidenVecRes_Ternary(N);
break;
}
@@ -4005,7 +4267,7 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
ConcatOps[j] = UndefVal;
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(ConcatOps.data(), NumOps));
+ ArrayRef(ConcatOps.data(), NumOps));
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@@ -4480,8 +4742,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+ SDValue FpValue = N->getOperand(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Arg = GetWidenedVector(N->getOperand(0));
+ if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+ SDValue Arg = GetWidenedVector(FpValue);
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)},
N->getFlags());
}
@@ -4585,33 +4850,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
unsigned WidenSize = WidenVT.getSizeInBits();
unsigned InSize = InVT.getSizeInBits();
+ unsigned InScalarSize = InVT.getScalarSizeInBits();
// x86mmx is not an acceptable vector element type, so don't try.
- if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) {
// Determine new input vector type. The new input vector type will use
// the same element type (if its a vector) or use the input type as a
// vector. It is the same size as the type to widen to.
EVT NewInVT;
- unsigned NewNumElts = WidenSize / InSize;
+ unsigned NewNumParts = WidenSize / InSize;
if (InVT.isVector()) {
EVT InEltVT = InVT.getVectorElementType();
NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
WidenSize / InEltVT.getSizeInBits());
} else {
- NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumParts);
}
if (TLI.isTypeLegal(NewInVT)) {
SDValue NewVec;
if (InVT.isVector()) {
// Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
+ // the result could create a legal type but widening the input might
+ // make it an illegal type that might lead to repeatedly splitting the
+ // input and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
- SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT));
- Ops[0] = InOp;
+ if (WidenSize % InSize == 0) {
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
- NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ } else {
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(InOp, Ops);
+ Ops.append(WidenSize / InScalarSize - Ops.size(),
+ DAG.getUNDEF(InVT.getVectorElementType()));
+
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
+ }
} else {
NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
}
@@ -4768,7 +5043,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// nxv2i64 extract_subvector(nxv16i64, 8)
// nxv2i64 extract_subvector(nxv16i64, 10)
// undef)
- unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts);
+ unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
"down type's element count");
EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
@@ -4915,6 +5190,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
+ SDLoc DL(N);
+
+ // The mask should be widened as well
+ SDValue Mask = N->getMask();
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided load");
+ Mask = GetWidenedVector(Mask);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(Mask.getValueType().getVectorElementCount() ==
+ WidenVT.getVectorElementCount() &&
+ "Data and mask vectors should have the same number of elements");
+
+ SDValue Res = DAG.getStridedLoadVP(
+ N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(),
+ N->getBasePtr(), N->getOffset(), N->getStride(), Mask,
+ N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(),
+ N->isExpandingLoad());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
@@ -5316,6 +5618,61 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
}
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue OpValue = GetWidenedVector(N->getOperand(0));
+ assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type");
+
+ SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue);
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned VTNumElts = VT.getVectorMinNumElements();
+ unsigned IdxVal = WidenNumElts - VTNumElts;
+
+ if (VT.isScalableVector()) {
+ // Try to split the 'Widen ReverseVal' into smaller extracts and concat the
+ // results together, e.g.(nxv6i64 -> nxv8i64)
+ // nxv8i64 vector_reverse
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv8i64, 2)
+ // nxv2i64 extract_subvector(nxv8i64, 4)
+ // nxv2i64 extract_subvector(nxv8i64, 6)
+ // nxv2i64 undef)
+
+ unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ SmallVector<SDValue> Parts;
+ unsigned i = 0;
+ for (; i < VTNumElts / GCD; ++i)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, ReverseVal,
+ DAG.getVectorIdxConstant(IdxVal + i * GCD, dl)));
+ for (; i < WidenNumElts / GCD; ++i)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for
+ // fixed-vectors.
+ SmallVector<int, 16> Mask;
+ for (unsigned i = 0; i != VTNumElts; ++i) {
+ Mask.push_back(IdxVal + i);
+ }
+ for (unsigned i = VTNumElts; i != WidenNumElts; ++i)
+ Mask.push_back(-1);
+
+ return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getUNDEF(WidenVT),
+ Mask);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -5432,6 +5789,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
+ break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
@@ -5910,6 +6270,38 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
ST->isCompressingStore());
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
+ unsigned OpNo) {
+ assert((OpNo == 1 || OpNo == 4) &&
+ "Can widen only data or mask operand of vp_strided_store");
+ VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
+ SDValue Mask = SST->getMask();
+ SDValue StVal = SST->getValue();
+ SDLoc DL(N);
+
+ if (OpNo == 1)
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided store");
+ else
+ assert(getTypeAction(StVal.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided store");
+
+ StVal = GetWidenedVector(StVal);
+ Mask = GetWidenedVector(Mask);
+
+ assert(StVal.getValueType().getVectorElementCount() ==
+ Mask.getValueType().getVectorElementCount() &&
+ "Data and mask vectors should have the same number of elements");
+
+ return DAG.getStridedStoreVP(
+ SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(),
+ SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(),
+ SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(),
+ SST->isCompressingStore());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of mstore");
@@ -6127,7 +6519,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
unsigned WideElts = WideVT.getVectorMinNumElements();
if (WideVT.isScalableVector()) {
- unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ unsigned GCD = std::gcd(OrigElts, WideElts);
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
ElementCount::getScalable(GCD));
SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
@@ -6164,7 +6556,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
unsigned WideElts = WideVT.getVectorMinNumElements();
if (WideVT.isScalableVector()) {
- unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ unsigned GCD = std::gcd(OrigElts, WideElts);
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
ElementCount::getScalable(GCD));
SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
@@ -6223,12 +6615,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// Align: If 0, don't allow use of a wider type
// WidenEx: If Align is not 0, the amount additional we can load/store from.
-static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
- unsigned Width, EVT WidenVT,
- unsigned Align = 0, unsigned WidenEx = 0) {
+static std::optional<EVT> findMemType(SelectionDAG &DAG,
+ const TargetLowering &TLI, unsigned Width,
+ EVT WidenVT, unsigned Align = 0,
+ unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
const bool Scalable = WidenVT.isScalableVector();
- unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
+ unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue();
unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
unsigned AlignInBits = Align*8;
@@ -6266,7 +6659,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
// Skip vector MVTs which don't match the scalable property of WidenVT.
if (Scalable != MemVT.isScalableVector())
continue;
- unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize();
+ unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue();
auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
if ((Action == TargetLowering::TypeLegal ||
Action == TargetLowering::TypePromoteInteger) &&
@@ -6283,7 +6676,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
// Using element-wise loads and stores for widening operations is not
// supported for scalable vectors
if (Scalable)
- return None;
+ return std::nullopt;
return RetVT;
}
@@ -6348,9 +6741,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
(!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
// Find the vector type that can load from.
- Optional<EVT> FirstVT =
- findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
+ std::optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinValue());
if (!FirstVT)
return SDValue();
@@ -6361,15 +6754,15 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// Unless we're able to load in one instruction we must work out how to load
// the remainder.
if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
- Optional<EVT> NewVT = FirstVT;
+ std::optional<EVT> NewVT = FirstVT;
TypeSize RemainingWidth = LdWidth;
TypeSize NewVTWidth = FirstVTWidth;
do {
RemainingWidth -= NewVTWidth;
if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
// The current type we are using is too large. Find a better size.
- NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,
- LdAlign, WidthDiff.getKnownMinSize());
+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(),
+ WidenVT, LdAlign, WidthDiff.getKnownMinValue());
if (!NewVT)
return SDValue();
NewVTWidth = NewVT->getSizeInBits();
@@ -6387,7 +6780,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
if (!FirstVT->isVector()) {
unsigned NumElts =
- WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
@@ -6396,9 +6789,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
return LdOp;
// TODO: We don't currently have any tests that exercise this code path.
- assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);
+ assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
unsigned NumConcat =
- WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
@@ -6461,9 +6854,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
TypeSize LdTySize = LdTy.getSizeInBits();
TypeSize NewLdTySize = NewLdTy.getSizeInBits();
assert(NewLdTySize.isScalable() == LdTySize.isScalable() &&
- NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinSize()));
+ NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue()));
unsigned NumOps =
- NewLdTySize.getKnownMinSize() / LdTySize.getKnownMinSize();
+ NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue();
SmallVector<SDValue, 16> WidenOps(NumOps);
unsigned j = 0;
for (; j != End-Idx; ++j)
@@ -6481,11 +6874,11 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(&ConcatOps[Idx], End - Idx));
+ ArrayRef(&ConcatOps[Idx], End - Idx));
// We need to fill the rest with undefs to build the vector.
unsigned NumOps =
- WidenWidth.getKnownMinSize() / LdTy.getSizeInBits().getKnownMinSize();
+ WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
{
@@ -6584,8 +6977,8 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- Optional<EVT> NewVT =
- findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ std::optional<EVT> NewVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT);
if (!NewVT)
return false;
MemVTs.push_back({*NewVT, 0});
@@ -6620,11 +7013,11 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
} while (--Count);
} else {
// Cast the vector to the scalar type we can store.
- unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
+ unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type.
- Idx = Idx * ValEltWidth / NewVTWidth.getFixedSize();
+ Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue();
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getVectorIdxConstant(Idx++, dl));
@@ -6636,7 +7029,7 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
} while (--Count);
// Restore index back to be relative to the original widen element type.
- Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
+ Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth;
}
}
@@ -6685,7 +7078,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
unsigned InNumElts = InEC.getFixedValue();
unsigned WidenNumElts = WidenEC.getFixedValue();
- // Fall back to extract and build.
+ // Fall back to extract and build (+ mask, if padding with zeros).
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = NVT.getVectorElementType();
unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
@@ -6694,9 +7087,21 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getVectorIdxConstant(Idx, dl));
- SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
- DAG.getUNDEF(EltVT);
- for ( ; Idx < WidenNumElts; ++Idx)
- Ops[Idx] = FillVal;
- return DAG.getBuildVector(NVT, dl, Ops);
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+
+ SDValue Widened = DAG.getBuildVector(NVT, dl, Ops);
+ if (!FillWithZeroes)
+ return Widened;
+
+ assert(NVT.isInteger() &&
+ "We expect to never want to FillWithZeroes for non-integral types.");
+
+ SmallVector<SDValue, 16> MaskOps;
+ MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT));
+ MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT));
+
+ return DAG.getNode(ISD::AND, dl, NVT, Widened,
+ DAG.getBuildVector(NVT, dl, MaskOps));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3ac2a7bddc5a..2d93adea6b9b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -426,10 +426,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
NumRes = 1;
} else {
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ assert(!MCID.implicit_defs().empty() &&
+ "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
+ for (MCPhysReg ImpDef : MCID.implicit_defs()) {
+ if (Reg == ImpDef)
break;
++NumRes;
}
@@ -526,11 +527,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
if (!Node->isMachineOpcode())
continue;
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
- if (!MCID.ImplicitDefs)
- continue;
- for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
- CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
- }
+ for (MCPhysReg Reg : MCID.implicit_defs())
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
return !LRegs.empty();
}
@@ -777,8 +775,7 @@ void ScheduleDAGLinearize::Schedule() {
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos,
- DAG->getUseInstrRefDebugInfo());
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8a04ce7535a1..c252046ef10b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -302,6 +302,8 @@ private:
} // end anonymous namespace
+static constexpr unsigned RegSequenceCost = 1;
+
/// GetCostForDef - Looks up the register class and cost for a given definition.
/// Typically this just means looking up the representative register class,
/// but for untyped values (MVT::Untyped) it means inspecting the node's
@@ -321,7 +323,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
// Special handling for CopyFromReg of untyped values.
if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
RegClass = RC->getID();
Cost = 1;
@@ -333,13 +335,14 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
RegClass = RC->getID();
- Cost = 1;
+ Cost = RegSequenceCost;
return;
}
unsigned Idx = RegDefPos.GetIdx();
- const MCInstrDesc Desc = TII->get(Opcode);
+ const MCInstrDesc &Desc = TII->get(Opcode);
const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+ assert(RC && "Not a valid register class");
RegClass = RC->getID();
// FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
// better way to determine it.
@@ -1089,7 +1092,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
RemovePred(SU, Pred);
AddPredQueued(NewSU, Pred);
}
- for (SDep D : NodeSuccs) {
+ for (SDep &D : NodeSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
@@ -1100,7 +1103,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
!D.isCtrl() && NewSU->NumRegDefsLeft > 0)
--NewSU->NumRegDefsLeft;
}
- for (SDep D : ChainSuccs) {
+ for (SDep &D : ChainSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
@@ -1204,11 +1207,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(NewSU);
AddPredQueued(SuccSU, D);
D.setSUnit(SU);
- DelDeps.push_back(std::make_pair(SuccSU, D));
+ DelDeps.emplace_back(SuccSU, D);
}
}
- for (auto &DelDep : DelDeps)
- RemovePred(DelDep.first, DelDep.second);
+ for (const auto &[DelSU, DelD] : DelDeps)
+ RemovePred(DelSU, DelD);
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(NewSU);
@@ -1242,17 +1245,17 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
SDep D = Succ;
D.setSUnit(CopyToSU);
AddPredQueued(SuccSU, D);
- DelDeps.push_back(std::make_pair(SuccSU, Succ));
+ DelDeps.emplace_back(SuccSU, Succ);
}
else {
- // Avoid scheduling the def-side copy before other successors. Otherwise
+ // Avoid scheduling the def-side copy before other successors. Otherwise,
// we could introduce another physreg interference on the copy and
// continue inserting copies indefinitely.
AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
}
}
- for (auto &DelDep : DelDeps)
- RemovePred(DelDep.first, DelDep.second);
+ for (const auto &[DelSU, DelD] : DelDeps)
+ RemovePred(DelSU, DelD);
SDep FromDep(SU, SDep::Data, Reg);
FromDep.setLatency(SU->Latency);
@@ -1281,10 +1284,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
NumRes = 1;
} else {
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ assert(!MCID.implicit_defs().empty() &&
+ "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
+ for (MCPhysReg ImpDef : MCID.implicit_defs()) {
+ if (Reg == ImpDef)
break;
++NumRes;
}
@@ -1381,8 +1385,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
InlineAsm::isClobberKind(Flags)) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (Register::isPhysicalRegister(Reg))
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (Reg.isPhysical())
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
@@ -1419,7 +1423,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
}
if (const uint32_t *RegMask = getNodeRegMask(Node))
CheckForLiveRegDefMasked(SU, RegMask,
- makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
RegAdded, LRegs);
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
@@ -1429,16 +1433,14 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// of %noreg. When the OptionalDef is set to a valid register, we need to
// handle it in the same way as an ImplicitDef.
for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
- if (MCID.OpInfo[i].isOptionalDef()) {
+ if (MCID.operands()[i].isOptionalDef()) {
const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
- unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
+ Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
}
- if (!MCID.ImplicitDefs)
- continue;
- for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
- CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ for (MCPhysReg Reg : MCID.implicit_defs())
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
return !LRegs.empty();
@@ -1484,16 +1486,15 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
else dbgs() << printReg(LRegs[0], TRI);
dbgs() << " SU #" << CurSU->NodeNum << '\n');
- std::pair<LRegsMapT::iterator, bool> LRegsPair =
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
- if (LRegsPair.second) {
+ auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
+ if (LRegsInserted) {
CurSU->isPending = true; // This SU is not in AvailableQueue right now.
Interferences.push_back(CurSU);
}
else {
assert(CurSU->isPending && "Interferences are pending");
// Update the interference with current live regs.
- LRegsPair.first->second = LRegs;
+ LRegsIter->second = LRegs;
}
CurSU = AvailableQueue->pop();
}
@@ -2302,6 +2303,16 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
}
+ if (POpc == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx =
+ cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ unsigned RCId = RC->getID();
+ // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
+ // here. Instead use the same constant as in GetCostForDef.
+ RegPressure[RCId] += RegSequenceCost;
+ continue;
+ }
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
MVT VT = PN->getSimpleValueType(i);
@@ -2376,9 +2387,9 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) {
const SUnit *PredSU = Pred.getSUnit();
if (PredSU->getNode() &&
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
- unsigned Reg =
- cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
RetVal = true;
continue;
}
@@ -2397,9 +2408,9 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
if (Succ.isCtrl()) continue;
const SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
- unsigned Reg =
- cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
RetVal = true;
continue;
}
@@ -2854,10 +2865,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const MCPhysReg *ImpDefs
- = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ ArrayRef<MCPhysReg> ImpDefs =
+ TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
const uint32_t *RegMask = getNodeRegMask(SU->getNode());
- if(!ImpDefs && !RegMask)
+ if (ImpDefs.empty() && !RegMask)
return false;
for (const SDep &Succ : SU->Succs) {
@@ -2871,14 +2882,14 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
return true;
- if (ImpDefs)
- for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
- // Return true if SU clobbers this physical register use and the
- // definition of the register reaches from DepSU. IsReachable queries
- // a topological forward sort of the DAG (following the successors).
- if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
- scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
- return true;
+ for (MCPhysReg ImpDef : ImpDefs) {
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+ }
}
}
return false;
@@ -2891,16 +2902,16 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
- assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
+ assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const MCPhysReg *SUImpDefs =
- TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ ArrayRef<MCPhysReg> SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).implicit_defs();
const uint32_t *SURegMask = getNodeRegMask(SUNode);
- if (!SUImpDefs && !SURegMask)
+ if (SUImpDefs.empty() && !SURegMask)
continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
MVT VT = N->getSimpleValueType(i);
@@ -2908,13 +2919,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
continue;
if (!N->hasAnyUseOfValue(i))
continue;
- unsigned Reg = ImpDefs[i - NumDefs];
+ MCPhysReg Reg = ImpDefs[i - NumDefs];
if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
return true;
- if (!SUImpDefs)
- continue;
- for (;*SUImpDefs; ++SUImpDefs) {
- unsigned SUReg = *SUImpDefs;
+ for (MCPhysReg SUReg : SUImpDefs) {
if (TRI->regsOverlap(Reg, SUReg))
return true;
}
@@ -2968,8 +2976,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyToReg &&
- Register::isVirtualRegister(
- cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
continue;
SDNode *PredFrameSetup = nullptr;
@@ -3015,8 +3022,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
- Register::isVirtualRegister(
- cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
continue;
// Perform checks on the successors of PredSU.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 5166db033c62..2e1fd1e8a758 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -110,11 +110,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
+ const TargetLowering &TLI,
unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost))
+ return;
+
if (Register::isVirtualRegister(Reg))
return;
@@ -188,7 +192,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
"expected an unused glue value");
CloneNodeWithValues(N, DAG,
- makeArrayRef(N->value_begin(), N->getNumValues() - 1));
+ ArrayRef(N->value_begin(), N->getNumValues() - 1));
}
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
@@ -460,7 +464,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
// Find all predecessors and successors of the group.
for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
- TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ !TII->get(N->getMachineOpcode()).implicit_defs().empty()) {
SU.hasPhysRegClobbers = true;
unsigned NumUsed = InstrEmitter::CountResults(N);
while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
@@ -485,7 +489,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
unsigned PhysReg = 0;
int Cost = 1;
// Determine if this is a physical register dependency.
- CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost);
assert((PhysReg == 0 || !isChain) &&
"Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
@@ -843,8 +848,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos,
- DAG->getUseInstrRefDebugInfo());
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
DenseMap<SUnit*, Register> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
@@ -890,6 +894,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MI->setFlag(MachineInstr::MIFlag::NoMerge);
}
+ if (MDNode *MD = DAG->getPCSections(Node))
+ MI->setPCSections(MF, MD);
+
return MI;
};
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 195c0e6a836f..9a3609bc183b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -27,6 +26,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -92,6 +93,7 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}
void SelectionDAG::DAGNodeDeletedListener::anchor() {}
+void SelectionDAG::DAGNodeInsertedListener::anchor() {}
#define DEBUG_TYPE "selectiondag"
@@ -291,6 +293,43 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
return true;
}
+bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize,
+ bool Signed) {
+ assert(N->getValueType(0).isVector() && "Expected a vector!");
+
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
+ if (EltSize <= NewEltSize)
+ return false;
+
+ if (N->getOpcode() == ISD::ZERO_EXTEND) {
+ return (N->getOperand(0).getValueType().getScalarSizeInBits() <=
+ NewEltSize) &&
+ !Signed;
+ }
+ if (N->getOpcode() == ISD::SIGN_EXTEND) {
+ return (N->getOperand(0).getValueType().getScalarSizeInBits() <=
+ NewEltSize) &&
+ Signed;
+ }
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantSDNode>(Op))
+ return false;
+
+ APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize);
+ if (Signed && C.trunc(NewEltSize).sext(EltSize) != C)
+ return false;
+ if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C)
+ return false;
+ }
+
+ return true;
+}
+
bool ISD::allOperandsUndef(const SDNode *N) {
// Return false if the node has no operands.
// This is "logically inconsistent" with the definition of "all" but
@@ -300,6 +339,10 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });
}
+bool ISD::isFreezeUndef(const SDNode *N) {
+ return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef();
+}
+
bool ISD::matchUnaryPredicate(SDValue Op,
std::function<bool(ConstantSDNode *)> Match,
bool AllowUndefs) {
@@ -450,10 +493,10 @@ bool ISD::isVPReduction(unsigned Opcode) {
}
/// The operand position of the vector mask.
-Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
+std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
default:
- return None;
+ return std::nullopt;
#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \
case ISD::VPSD: \
return MASKPOS;
@@ -462,10 +505,10 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
}
/// The operand position of the explicit vector length parameter.
-Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
+std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
switch (Opcode) {
default:
- return None;
+ return std::nullopt;
#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
case ISD::VPSD: \
return EVLPOS;
@@ -618,7 +661,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
}
}
-static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
+static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC,
SDVTList VTList, ArrayRef<SDValue> OpList) {
AddNodeIDOpcode(ID, OpC);
AddNodeIDValueTypes(ID, VTList);
@@ -1018,6 +1061,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
// If any of the SDDbgValue nodes refer to this SDNode, invalidate
// them and forget about that node.
DbgInfo->erase(N);
+
+ // Invalidate extra info.
+ SDEI.erase(N);
}
#ifndef NDEBUG
@@ -1230,18 +1276,18 @@ Align SelectionDAG::getEVTAlign(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), OptLevel(OL),
- EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
+ EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)),
Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
void SelectionDAG::init(MachineFunction &NewMF,
- OptimizationRemarkEmitter &NewORE,
- Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis * Divergence,
- ProfileSummaryInfo *PSIin,
- BlockFrequencyInfo *BFIin) {
+ OptimizationRemarkEmitter &NewORE, Pass *PassPtr,
+ const TargetLibraryInfo *LibraryInfo,
+ LegacyDivergenceAnalysis *Divergence,
+ ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin,
+ FunctionVarLocs const *VarLocs) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -1252,6 +1298,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
DA = Divergence;
PSI = PSIin;
BFI = BFIin;
+ FnVarLocs = VarLocs;
}
SelectionDAG::~SelectionDAG() {
@@ -1326,7 +1373,7 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
- SDCallSiteDbgInfo.clear();
+ SDEI.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
@@ -1341,7 +1388,8 @@ void SelectionDAG::clear() {
SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType())
? getNode(ISD::FP_EXTEND, DL, VT, Op)
- : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
+ : getNode(ISD::FP_ROUND, DL, VT, Op,
+ getIntPtrConstant(0, DL, /*isTarget=*/true));
}
std::pair<SDValue, SDValue>
@@ -1415,6 +1463,10 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
return getZeroExtendInReg(Op, DL, VT);
}
+SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) {
+ return getNode(ISD::SUB, DL, VT, getConstant(0, DL, VT), Val);
+}
+
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));
@@ -1431,6 +1483,20 @@ SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val,
return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL);
}
+SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op,
+ SDValue Mask, SDValue EVL) {
+ return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL);
+}
+
+SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op,
+ SDValue Mask, SDValue EVL) {
+ if (VT.bitsGT(Op.getValueType()))
+ return getNode(ISD::VP_ZERO_EXTEND, DL, VT, Op, Mask, EVL);
+ if (VT.bitsLT(Op.getValueType()))
+ return getNode(ISD::VP_TRUNCATE, DL, VT, Op, Mask, EVL);
+ return Op;
+}
+
SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
EVT OpVT) {
if (!V)
@@ -1544,7 +1610,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
"APInt size does not match type size!");
unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
ID.AddPointer(Elt);
ID.AddBoolean(isO);
void *IP = nullptr;
@@ -1561,11 +1627,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isScalableVector())
- Result = getSplatVector(VT, DL, Result);
- else if (VT.isVector())
- Result = getSplatBuildVector(VT, DL, Result);
-
+ if (VT.isVector())
+ Result = getSplat(VT, DL, Result);
return Result;
}
@@ -1602,7 +1665,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
// we don't have issues with SNANs.
unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
ID.AddPointer(&V);
void *IP = nullptr;
SDNode *N = nullptr;
@@ -1617,10 +1680,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isScalableVector())
- Result = getSplatVector(VT, DL, Result);
- else if (VT.isVector())
- Result = getSplatBuildVector(VT, DL, Result);
+ if (VT.isVector())
+ Result = getSplat(VT, DL, Result);
NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
return Result;
}
@@ -1661,7 +1722,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -1679,7 +1740,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(FI);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -1697,7 +1758,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(JTI);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
@@ -1721,7 +1782,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
: getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
ID.AddPointer(C);
@@ -1748,7 +1809,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
Alignment = getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
C->addSelectionDAGCSEId(ID);
@@ -1767,7 +1828,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
unsigned TargetFlags) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt);
ID.AddInteger(Index);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -1783,7 +1844,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt);
ID.AddPointer(MBB);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -1894,7 +1955,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
"Index out of range");
// Copy the mask so we can do any needed cleanup.
- SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end());
+ SmallVector<int, 8> MaskVec(Mask);
// Canonicalize shuffle v, v -> v, undef
if (N1 == N2) {
@@ -2050,7 +2111,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
EVT VT = SV.getValueType(0);
- SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+ SmallVector<int, 8> MaskVec(SV.getMask());
ShuffleVectorSDNode::commuteMask(MaskVec);
SDValue Op0 = SV.getOperand(0);
@@ -2060,7 +2121,7 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt);
ID.AddInteger(RegNo);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -2075,7 +2136,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), std::nullopt);
ID.AddPointer(RegMask);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -2117,7 +2178,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddPointer(BA);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -2133,7 +2194,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
SDValue SelectionDAG::getSrcValue(const Value *V) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), std::nullopt);
ID.AddPointer(V);
void *IP = nullptr;
@@ -2148,7 +2209,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), std::nullopt);
ID.AddPointer(MD);
void *IP = nullptr;
@@ -2287,7 +2348,7 @@ SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
StackID = TFI->getStackIDForScalableVectors();
// The stack id gives an indication of whether the object is scalable or
// not, so it's safe to pass in the minimum size here.
- int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment,
+ int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinValue(), Alignment,
false, nullptr, StackID);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
@@ -2305,8 +2366,9 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
assert(VT1Size.isScalable() == VT2Size.isScalable() &&
"Don't know how to choose the maximum size when creating a stack "
"temporary");
- TypeSize Bytes =
- VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size;
+ TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue()
+ ? VT1Size
+ : VT2Size;
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
@@ -2380,34 +2442,34 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
default: break;
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
VT, OpVT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual, dl, VT, OpVT);
case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
@@ -2459,48 +2521,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
return SDValue();
}
-/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by DemandedBits are used.
-/// TODO: really we should be making this into the DAG equivalent of
-/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
- EVT VT = V.getValueType();
-
- if (VT.isScalableVector())
- return SDValue();
-
- switch (V.getOpcode()) {
- default:
- return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this);
- case ISD::Constant: {
- const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
- APInt NewVal = CVal & DemandedBits;
- if (NewVal != CVal)
- return getConstant(NewVal, SDLoc(V), V.getValueType());
- break;
- }
- case ISD::SRL:
- // Only look at single-use SRLs.
- if (!V.getNode()->hasOneUse())
- break;
- if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
- // See if we can recursively simplify the LHS.
- unsigned Amt = RHSC->getZExtValue();
-
- // Watch out for shift count overflow though.
- if (Amt >= DemandedBits.getBitWidth())
- break;
- APInt SrcDemandedBits = DemandedBits << Amt;
- if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits(
- V.getOperand(0), SrcDemandedBits, *this))
- return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
- V.getOperand(1));
- }
- break;
- }
- return SDValue();
-}
-
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
@@ -2538,17 +2558,40 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
return Mask.isSubsetOf(computeKnownBits(V, Depth).One);
}
+APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask.");
+
+ APInt KnownZeroElements = APInt::getNullValue(NumElts);
+ for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
+ if (!DemandedElts[EltIdx])
+ continue; // Don't query elements that are not demanded.
+ APInt Mask = APInt::getOneBitSet(NumElts, EltIdx);
+ if (MaskedVectorIsZero(Op, Mask, Depth))
+ KnownZeroElements.setBit(EltIdx);
+ }
+ return KnownZeroElements;
+}
+
/// isSplatValue - Return true if the vector V has the same value
-/// across all DemandedElts. For scalable vectors it does not make
-/// sense to specify which elements are demanded or undefined, therefore
-/// they are simply ignored.
+/// across all DemandedElts. For scalable vectors, we don't know the
+/// number of lanes at compile time. Instead, we use a 1 bit APInt
+/// to represent a conservative value for all lanes; that is, that
+/// one bit value is implicitly splatted across all lanes.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
APInt &UndefElts, unsigned Depth) const {
unsigned Opcode = V.getOpcode();
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
+ assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) &&
+ "scalable demanded bits are ignored");
- if (!VT.isScalableVector() && !DemandedElts)
+ if (!DemandedElts)
return false; // No demanded elts, better to assume we don't know anything.
if (Depth >= MaxRecursionDepth)
@@ -2585,7 +2628,8 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
default:
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
- return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth);
+ return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, *this,
+ Depth);
break;
}
@@ -2730,11 +2774,11 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const {
assert(VT.isVector() && "Vector type expected");
APInt UndefElts;
- APInt DemandedElts;
-
- // For now we don't support this with scalable vectors.
- if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts
+ = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2747,10 +2791,11 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
switch (Opcode) {
default: {
APInt UndefElts;
- APInt DemandedElts;
-
- if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts
+ = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements());
if (isSplatValue(V, DemandedElts, UndefElts)) {
if (VT.isScalableVector()) {
@@ -2773,9 +2818,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
SplatIdx = 0;
return V;
case ISD::VECTOR_SHUFFLE: {
- if (VT.isScalableVector())
- return SDValue();
-
+ assert(!VT.isScalableVector());
// Check if this is a shuffle node doing a splat.
// TODO - remove this and rely purely on SelectionDAG::isSplatValue,
// getTargetVShiftNode currently struggles without the splat source.
@@ -2890,14 +2933,10 @@ const APInt *SelectionDAG::getValidMaximumShiftAmountConstant(
KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
- // TOOD: Until we have a plan for how to represent demanded elements for
- // scalable vectors, we can just bail out for now.
- if (Op.getValueType().isScalableVector()) {
- unsigned BitWidth = Op.getScalarValueSizeInBits();
- return KnownBits(BitWidth);
- }
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return computeKnownBits(Op, DemandedElts, Depth);
@@ -2912,11 +2951,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
KnownBits Known(BitWidth); // Don't know anything.
- // TOOD: Until we have a plan for how to represent demanded elements for
- // scalable vectors, we can just bail out for now.
- if (Op.getValueType().isScalableVector())
- return Known;
-
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
return KnownBits::makeConstant(C->getAPIntValue());
@@ -2931,7 +2965,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
KnownBits Known2;
unsigned NumElts = DemandedElts.getBitWidth();
- assert((!Op.getValueType().isVector() ||
+ assert((!Op.getValueType().isFixedLengthVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
"Unexpected vector size");
@@ -2943,7 +2977,17 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::MERGE_VALUES:
return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts,
Depth + 1);
+ case ISD::SPLAT_VECTOR: {
+ SDValue SrcOp = Op.getOperand(0);
+ assert(SrcOp.getValueSizeInBits() >= BitWidth &&
+ "Expected SPLAT_VECTOR implicit truncation");
+ // Implicitly truncate the bits to match the official semantics of
+ // SPLAT_VECTOR.
+ Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
+ break;
+ }
case ISD::BUILD_VECTOR:
+ assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every demanded vector element.
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
@@ -2969,32 +3013,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
break;
case ISD::VECTOR_SHUFFLE: {
+ assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every vector element referenced
// by the shuffle.
- APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
- Known.Zero.setAllBits(); Known.One.setAllBits();
+ APInt DemandedLHS, DemandedRHS;
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
- for (unsigned i = 0; i != NumElts; ++i) {
- if (!DemandedElts[i])
- continue;
-
- int M = SVN->getMaskElt(i);
- if (M < 0) {
- // For UNDEF elements, we don't know anything about the common state of
- // the shuffle result.
- Known.resetAll();
- DemandedLHS.clearAllBits();
- DemandedRHS.clearAllBits();
- break;
- }
+ if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts,
+ DemandedLHS, DemandedRHS))
+ break;
- if ((unsigned)M < NumElts)
- DemandedLHS.setBit((unsigned)M % NumElts);
- else
- DemandedRHS.setBit((unsigned)M % NumElts);
- }
// Known bits are the values that are shared by every demanded element.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
@@ -3011,6 +3041,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::CONCAT_VECTORS: {
+ if (Op.getValueType().isScalableVector())
+ break;
// Split DemandedElts and test each of the demanded subvectors.
Known.Zero.setAllBits(); Known.One.setAllBits();
EVT SubVectorVT = Op.getOperand(0).getValueType();
@@ -3031,6 +3063,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::INSERT_SUBVECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
@@ -3058,7 +3092,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
// Bail until we can represent demanded elements for scalable vectors.
- if (Src.getValueType().isScalableVector())
+ if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector())
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
@@ -3067,6 +3101,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::SCALAR_TO_VECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
// We know about scalar_to_vector as much as we know about it source,
// which becomes the first element of otherwise unknown vector.
if (DemandedElts != 1)
@@ -3080,6 +3116,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::BITCAST: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
SDValue N0 = Op.getOperand(0);
EVT SubVT = N0.getValueType();
unsigned SubBitWidth = SubVT.getScalarSizeInBits();
@@ -3335,13 +3374,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
// Collect lo/hi source values and concatenate.
- // TODO: Would a KnownBits::concatBits helper be useful?
unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits();
unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits();
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = Known.anyext(LoBits + HiBits);
- Known.insertBits(Known2, LoBits);
+ Known = Known2.concat(Known);
// Collect shift amount.
Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
@@ -3372,7 +3409,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleTZ = Known2.countMaxTrailingZeros();
- unsigned LowBits = Log2_32(PossibleTZ) + 1;
+ unsigned LowBits = llvm::bit_width(PossibleTZ);
Known.Zero.setBitsFrom(LowBits);
break;
}
@@ -3381,7 +3418,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleLZ = Known2.countMaxLeadingZeros();
- unsigned LowBits = Log2_32(PossibleLZ) + 1;
+ unsigned LowBits = llvm::bit_width(PossibleLZ);
Known.Zero.setBitsFrom(LowBits);
break;
}
@@ -3389,7 +3426,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we know some of the bits are zero, they can't be one.
unsigned PossibleOnes = Known2.countMaxPopulation();
- Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
+ Known.Zero.setBitsFrom(llvm::bit_width(PossibleOnes));
break;
}
case ISD::PARITY: {
@@ -3403,7 +3440,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (ISD::isNON_EXTLoad(LD) && Cst) {
// Determine any common known bits from the loaded constant pool value.
Type *CstTy = Cst->getType();
- if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+ if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() &&
+ !Op.getValueType().isScalableVector()) {
// If its a vector splat, then we can (quickly) reuse the scalar path.
// NOTE: We assume all elements match and none are UNDEF.
if (CstTy->isVectorTy()) {
@@ -3453,12 +3491,32 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- if (LD->getExtensionType() == ISD::NON_EXTLOAD)
- computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ EVT VT = LD->getValueType(0);
+
+ // TODO: Handle for extending loads
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (VT.isVector()) {
+ // Handle truncation to the first demanded element.
+ // TODO: Figure out which demanded elements are covered
+ if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
+ break;
+
+ // Handle the case where a load has a vector type, but scalar memory
+ // with an attached range.
+ EVT MemVT = LD->getMemoryVT();
+ KnownBits KnownFull(MemVT.getSizeInBits());
+
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownFull);
+ Known = KnownFull.trunc(BitWidth);
+ } else
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ }
}
break;
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3471,6 +3529,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3487,6 +3547,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3506,7 +3568,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero |= (~InMask);
Known.One &= (~Known.Zero);
break;
@@ -3538,7 +3600,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SUB:
case ISD::SUBC: {
assert(Op.getResNo() == 0 &&
@@ -3566,7 +3628,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE: {
@@ -3652,6 +3714,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
// If we know the element index, split the demand between the
// source vector and the inserted element, otherwise assume we need
// the original demanded vector elements and the value.
@@ -3781,7 +3846,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
@@ -3814,10 +3879,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
default:
if (Opcode < ISD::BUILTIN_OP_END)
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (Op.getValueType().isScalableVector())
+ break;
+
// Allow the target to implement this method for its nodes.
TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
break;
@@ -3914,11 +3984,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
- // TODO: Assume we don't know anything for now.
- if (VT.isScalableVector())
- return 1;
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
@@ -3941,7 +4010,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (Depth >= MaxRecursionDepth)
return 1; // Limit search depth.
- if (!DemandedElts || VT.isScalableVector())
+ if (!DemandedElts)
return 1; // No demanded elts, better to assume we don't know anything.
unsigned Opcode = Op.getOpcode();
@@ -3956,7 +4025,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::MERGE_VALUES:
return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts,
Depth + 1);
+ case ISD::SPLAT_VECTOR: {
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned NumSrcBits = Op.getOperand(0).getValueSizeInBits();
+ unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - VTBits))
+ return NumSrcSignBits - (NumSrcBits - VTBits);
+ break;
+ }
case ISD::BUILD_VECTOR:
+ assert(!VT.isScalableVector());
Tmp = VTBits;
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
if (!DemandedElts[i])
@@ -3979,22 +4057,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::VECTOR_SHUFFLE: {
// Collect the minimum number of sign bits that are shared by every vector
// element referenced by the shuffle.
- APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ APInt DemandedLHS, DemandedRHS;
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
- for (unsigned i = 0; i != NumElts; ++i) {
- int M = SVN->getMaskElt(i);
- if (!DemandedElts[i])
- continue;
- // For UNDEF elements, we don't know anything about the common state of
- // the shuffle result.
- if (M < 0)
- return 1;
- if ((unsigned)M < NumElts)
- DemandedLHS.setBit((unsigned)M % NumElts);
- else
- DemandedRHS.setBit((unsigned)M % NumElts);
- }
+ if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts,
+ DemandedLHS, DemandedRHS))
+ return 1;
+
Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedLHS)
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
@@ -4010,6 +4079,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ break;
SDValue N0 = Op.getOperand(0);
EVT SrcVT = N0.getValueType();
unsigned SrcBits = SrcVT.getScalarSizeInBits();
@@ -4067,6 +4138,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
return std::max(Tmp, Tmp2);
case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (VT.isScalableVector())
+ break;
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
@@ -4284,6 +4357,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::EXTRACT_ELEMENT: {
+ if (VT.isScalableVector())
+ break;
const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
const int BitWidth = Op.getValueSizeInBits();
const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
@@ -4294,9 +4369,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
- return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+ return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth);
}
case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ break;
// If we know the element index, split the demand between the
// source vector and the inserted element, otherwise assume we need
// the original demanded vector elements and the value.
@@ -4327,6 +4404,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
case ISD::EXTRACT_VECTOR_ELT: {
+ assert(!VT.isScalableVector());
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
@@ -4365,6 +4443,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
+ if (VT.isScalableVector())
+ break;
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
Tmp = std::numeric_limits<unsigned>::max();
@@ -4383,6 +4463,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ break;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
@@ -4406,6 +4488,34 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (const MDNode *Ranges = LD->getRanges()) {
+ if (DemandedElts != 1)
+ break;
+
+ ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
+ if (VTBits > CR.getBitWidth()) {
+ switch (LD->getExtensionType()) {
+ case ISD::SEXTLOAD:
+ CR = CR.signExtend(VTBits);
+ break;
+ case ISD::ZEXTLOAD:
+ CR = CR.zeroExtend(VTBits);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (VTBits != CR.getBitWidth())
+ break;
+ return std::min(CR.getSignedMin().getNumSignBits(),
+ CR.getSignedMax().getNumSignBits());
+ }
+
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
@@ -4453,7 +4563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// We only need to handle vectors - computeKnownBits should handle
// scalar cases.
Type *CstTy = Cst->getType();
- if (CstTy->isVectorTy() &&
+ if (CstTy->isVectorTy() && !VT.isScalableVector() &&
(NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() &&
VTBits == CstTy->getScalarSizeInBits()) {
Tmp = VTBits;
@@ -4488,10 +4598,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Opcode == ISD::INTRINSIC_WO_CHAIN ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::INTRINSIC_VOID) {
- unsigned NumBits =
+ // TODO: This can probably be removed once target code is audited. This
+ // is here purely to reduce patch size and review complexity.
+ if (!VT.isScalableVector()) {
+ unsigned NumBits =
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
- if (NumBits > 1)
- FirstAnswer = std::max(FirstAnswer, NumBits);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -4547,6 +4661,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
return true;
switch (Opcode) {
+ case ISD::VALUETYPE:
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ return true;
+
case ISD::UNDEF:
return PoisonOnly;
@@ -4562,9 +4681,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
}
return true;
- // TODO: Search for noundef attributes from library functions.
+ // TODO: Search for noundef attributes from library functions.
- // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
+ // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
default:
// Allow the target to implement this method for its nodes.
@@ -4575,7 +4694,94 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
break;
}
- return false;
+ // If Op can't create undef/poison and none of its operands are undef/poison
+ // then Op is never undef/poison.
+ // NOTE: TargetNodes should handle this in themselves in
+ // isGuaranteedNotToBeUndefOrPoisonForTargetNode.
+ return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true,
+ Depth) &&
+ all_of(Op->ops(), [&](SDValue V) {
+ return isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, Depth + 1);
+ });
+}
+
+bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
+ bool ConsiderFlags,
+ unsigned Depth) const {
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return true;
+
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
+ Depth);
+}
+
+bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
+ bool PoisonOnly, bool ConsiderFlags,
+ unsigned Depth) const {
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return true;
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::FREEZE:
+ case ISD::INSERT_SUBVECTOR:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::FSHL:
+ case ISD::FSHR:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::BITREVERSE:
+ case ISD::PARITY:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::BITCAST:
+ case ISD::BUILD_VECTOR:
+ return false;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ // Matches hasPoisonGeneratingFlags().
+ return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
+ Op->getFlags().hasNoUnsignedWrap());
+
+ case ISD::SHL:
+ // If the max shift amount isn't in range, then the shift can create poison.
+ if (!getValidMaximumShiftAmountConstant(Op, DemandedElts))
+ return true;
+
+ // Matches hasPoisonGeneratingFlags().
+ return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
+ Op->getFlags().hasNoUnsignedWrap());
+
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->canCreateUndefOrPoisonForTargetNode(
+ Op, DemandedElts, *this, PoisonOnly, ConsiderFlags, Depth);
+ break;
+ }
+
+ // Be conservative and return true.
+ return true;
}
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
@@ -4598,7 +4804,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
- // TODO: Handle vectors.
// If the value is a constant, we can obviously see if it is a NaN or not.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
return !C->getValueAPF().isNaN() ||
@@ -4613,7 +4818,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FDIV:
case ISD::FREM:
case ISD::FSIN:
- case ISD::FCOS: {
+ case ISD::FCOS:
+ case ISD::FMA:
+ case ISD::FMAD: {
if (SNaN)
return true;
// TODO: Need isKnownNeverInfinity
@@ -4650,14 +4857,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return true;
- case ISD::FMA:
- case ISD::FMAD: {
- if (SNaN)
- return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
- }
case ISD::FSQRT: // Need is known positive
case ISD::FLOG:
case ISD::FLOG2:
@@ -4696,6 +4895,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::EXTRACT_VECTOR_ELT: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
+ case ISD::BUILD_VECTOR: {
+ for (const SDValue &Opnd : Op->ops())
+ if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+ return false;
+ return true;
+ }
default:
if (Opcode >= ISD::BUILTIN_OP_END ||
Opcode == ISD::INTRINSIC_WO_CHAIN ||
@@ -4938,7 +5143,7 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
/// Gets or creates the specified node.
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, getVTList(VT), None);
+ AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
@@ -4980,7 +5185,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
if (C->isOpaque())
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ZERO_EXTEND:
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
@@ -5166,7 +5371,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FREEZE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
- if (isGuaranteedNotToBeUndefOrPoison(Operand))
+ if (isGuaranteedNotToBeUndefOrPoison(Operand, /*PoisonOnly*/ false,
+ /*Depth*/ 1))
return Operand;
break;
case ISD::TokenFactor:
@@ -5428,8 +5634,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
}
-static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
- const APInt &C2) {
+static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+ const APInt &C2) {
switch (Opcode) {
case ISD::ADD: return C1 + C2;
case ISD::SUB: return C1 - C2;
@@ -5505,7 +5711,23 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
}
}
- return llvm::None;
+ return std::nullopt;
+}
+
+// Handle constant folding with UNDEF.
+// TODO: Handle more cases.
+static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1,
+ bool IsUndef1, const APInt &C2,
+ bool IsUndef2) {
+ if (!(IsUndef1 || IsUndef2))
+ return FoldValue(Opcode, C1, C2);
+
+ // Fold and(x, undef) -> 0
+ // Fold mul(x, undef) -> 0
+ if (Opcode == ISD::AND || Opcode == ISD::MUL)
+ return APInt::getZero(C1.getBitWidth());
+
+ return std::nullopt;
}
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
@@ -5581,7 +5803,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (C1->isOpaque() || C2->isOpaque())
return SDValue();
- Optional<APInt> FoldAttempt =
+ std::optional<APInt> FoldAttempt =
FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
if (!FoldAttempt)
return SDValue();
@@ -5608,7 +5830,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
ElementCount NumElts = VT.getVectorElementCount();
// See if we can fold through bitcasted integer ops.
- // TODO: Can we handle undef elements?
if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
Ops[0].getOpcode() == ISD::BITCAST &&
@@ -5624,11 +5845,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
SmallVector<APInt> RawBits1, RawBits2;
BitVector UndefElts1, UndefElts2;
if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
- BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
- UndefElts1.none() && UndefElts2.none()) {
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) {
SmallVector<APInt> RawBits;
for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
- Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ std::optional<APInt> Fold = FoldValueWithUndef(
+ Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]);
if (!Fold)
break;
RawBits.push_back(*Fold);
@@ -5823,7 +6044,7 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true))
if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef())
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::FADD:
case ISD::FMUL:
@@ -5882,11 +6103,11 @@ void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
// Canonicalize:
// binop(const, nonconst) -> binop(nonconst, const)
- bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
- bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
- bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
- bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
- if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
+ SDNode *N1C = isConstantIntBuildVectorOrConstantInt(N1);
+ SDNode *N2C = isConstantIntBuildVectorOrConstantInt(N2);
+ SDNode *N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ SDNode *N2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+ if ((N1C && !N2C) || (N1CFP && !N2CFP))
std::swap(N1, N2);
// Canonicalize:
@@ -5995,6 +6216,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT));
}
break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
case ISD::SMIN:
case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
@@ -6034,12 +6261,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
const APInt &ShiftImm = N2C->getAPIntValue();
return getVScale(DL, VT, MulImm << ShiftImm);
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SRA:
case ISD::SRL:
if (SDValue V = simplifyShift(N1, N2))
return V;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ROTL:
case ISD::ROTR:
assert(VT == N1.getValueType() &&
@@ -6329,7 +6556,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse).
return getConstant(0, DL, VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ADD:
case ISD::SUB:
case ISD::UDIV:
@@ -6484,6 +6711,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Dest and insert subvector source types must match!");
assert(VT.isVector() && N2VT.isVector() &&
"Insert subvector VTs must be vectors!");
+ assert(VT.getVectorElementType() == N2VT.getVectorElementType() &&
+ "Insert subvector VTs must have the same element type!");
assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) &&
"Cannot insert a scalable vector into a fixed length vector!");
assert((VT.isScalableVector() != N2VT.isScalableVector() ||
@@ -6674,10 +6903,10 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
if (Offset.isScalable())
Index = getVScale(DL, Base.getValueType(),
- APInt(Base.getValueSizeInBits().getFixedSize(),
- Offset.getKnownMinSize()));
+ APInt(Base.getValueSizeInBits().getFixedValue(),
+ Offset.getKnownMinValue()));
else
- Index = getConstant(Offset.getFixedSize(), DL, VT);
+ Index = getConstant(Offset.getFixedValue(), DL, VT);
return getMemBasePlusOffset(Base, Index, DL, Flags);
}
@@ -6794,7 +7023,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Align NewAlign = DL.getABITypeAlign(Ty);
// Don't promote to an alignment that would require dynamic stack
- // realignment.
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
@@ -6986,6 +7216,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
Align NewAlign = DL.getABITypeAlign(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
@@ -7094,7 +7333,17 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty);
+ const DataLayout &DL = DAG.getDataLayout();
+ Align NewAlign = DL.getABITypeAlign(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
@@ -7562,6 +7811,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_FSUB ||
Opcode == ISD::ATOMIC_LOAD_FMAX ||
Opcode == ISD::ATOMIC_LOAD_FMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UINC_WRAP ||
+ Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
@@ -8816,12 +9067,12 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
return CondC->isZero() ? F : T;
- // TODO: This should simplify VSELECT with constant condition using something
- // like this (but check boolean contents to be complete?):
- // if (ISD::isBuildVectorAllOnes(Cond.getNode()))
- // return T;
- // if (ISD::isBuildVectorAllZeros(Cond.getNode()))
- // return F;
+ // TODO: This should simplify VSELECT with non-zero constant condition using
+ // something like this (but check boolean contents to be complete?):
+ if (ConstantSDNode *CondC = isConstOrConstSplat(Cond, /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true))
+ if (CondC->isZero())
+ return F;
// select ?, T, T --> T
if (T == F)
@@ -9177,7 +9428,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
SDVTList VTList) {
- return getNode(Opcode, DL, VTList, None);
+ return getNode(Opcode, DL, VTList, std::nullopt);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
@@ -9444,7 +9695,7 @@ void SelectionDAG::setNodeMemRefs(MachineSDNode *N,
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT) {
SDVTList VTs = getVTList(VT);
- return SelectNodeTo(N, MachineOpc, VTs, None);
+ return SelectNodeTo(N, MachineOpc, VTs, std::nullopt);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -9485,7 +9736,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2) {
SDVTList VTs = getVTList(VT1, VT2);
- return SelectNodeTo(N, MachineOpc, VTs, None);
+ return SelectNodeTo(N, MachineOpc, VTs, std::nullopt);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -9652,7 +9903,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT) {
SDVTList VTs = getVTList(VT);
- return getMachineNode(Opcode, dl, VTs, None);
+ return getMachineNode(Opcode, dl, VTs, std::nullopt);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
@@ -10091,6 +10342,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
// Preserve Debug Values
transferDbgValues(FromN, To);
+ // Preserve extra info.
+ copyExtraInfo(From, To.getNode());
// Iterate over all the existing uses of From. New uses will be added
// to the beginning of the use list, which we avoid visiting.
@@ -10152,6 +10405,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
assert((i < To->getNumValues()) && "Invalid To location");
transferDbgValues(SDValue(From, i), SDValue(To, i));
}
+ // Preserve extra info.
+ copyExtraInfo(From, To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -10194,9 +10449,12 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
if (From->getNumValues() == 1) // Handle the simple case efficiently.
return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
- // Preserve Debug Info.
- for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) {
+ // Preserve Debug Info.
transferDbgValues(SDValue(From, i), To[i]);
+ // Preserve extra info.
+ copyExtraInfo(From, To[i].getNode());
+ }
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -10249,6 +10507,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
// Preserve Debug Info.
transferDbgValues(From, To);
+ copyExtraInfo(From.getNode(), To.getNode());
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -10402,6 +10661,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
return ReplaceAllUsesOfValueWith(*From, *To);
transferDbgValues(*From, *To);
+ copyExtraInfo(From->getNode(), To->getNode());
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
@@ -10636,6 +10896,67 @@ bool llvm::isMinSignedConstant(SDValue V) {
return Const != nullptr && Const->isMinSignedValue();
}
+bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V,
+ unsigned OperandNo) {
+ // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity().
+ // TODO: Target-specific opcodes could be added.
+ if (auto *Const = isConstOrConstSplat(V)) {
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::UMAX:
+ return Const->isZero();
+ case ISD::MUL:
+ return Const->isOne();
+ case ISD::AND:
+ case ISD::UMIN:
+ return Const->isAllOnes();
+ case ISD::SMAX:
+ return Const->isMinSignedValue();
+ case ISD::SMIN:
+ return Const->isMaxSignedValue();
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return OperandNo == 1 && Const->isZero();
+ case ISD::UDIV:
+ case ISD::SDIV:
+ return OperandNo == 1 && Const->isOne();
+ }
+ } else if (auto *ConstFP = isConstOrConstSplatFP(V)) {
+ switch (Opcode) {
+ case ISD::FADD:
+ return ConstFP->isZero() &&
+ (Flags.hasNoSignedZeros() || ConstFP->isNegative());
+ case ISD::FSUB:
+ return OperandNo == 1 && ConstFP->isZero() &&
+ (Flags.hasNoSignedZeros() || !ConstFP->isNegative());
+ case ISD::FMUL:
+ return ConstFP->isExactlyValue(1.0);
+ case ISD::FDIV:
+ return OperandNo == 1 && ConstFP->isExactlyValue(1.0);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+ EVT VT = V.getValueType();
+ const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoNaNs()
+ ? APFloat::getQNaN(Semantics)
+ : !Flags.hasNoInfs()
+ ? APFloat::getInf(Semantics)
+ : APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXNUM)
+ NeutralAF.changeSign();
+
+ return ConstFP->isExactlyValue(NeutralAF);
+ }
+ }
+ }
+ return false;
+}
+
SDValue llvm::peekThroughBitcasts(SDValue V) {
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
@@ -10666,6 +10987,16 @@ bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
bool AllowTruncation) {
+ EVT VT = N.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorMinNumElements())
+ : APInt(1, 1);
+ return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation);
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs,
+ bool AllowTruncation) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -10683,34 +11014,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
- ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
-
- // BuildVectors can truncate their operands. Ignore that case here unless
- // AllowTruncation is set.
- if (CN && (UndefElements.none() || AllowUndefs)) {
- EVT CVT = CN->getValueType(0);
- EVT NSVT = N.getValueType().getScalarType();
- assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
- if (AllowTruncation || (CVT == NSVT))
- return CN;
- }
- }
-
- return nullptr;
-}
-
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
- bool AllowUndefs,
- bool AllowTruncation) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
- return CN;
-
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
// BuildVectors can truncate their operands. Ignore that case here unless
// AllowTruncation is set.
+ // TODO: Look into whether we should allow UndefElements in non-DemandedElts
if (CN && (UndefElements.none() || AllowUndefs)) {
EVT CVT = CN->getValueType(0);
EVT NSVT = N.getValueType().getScalarType();
@@ -10724,21 +11032,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
}
ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
- if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
- return CN;
-
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
- ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
- if (CN && (UndefElements.none() || AllowUndefs))
- return CN;
- }
-
- if (N.getOpcode() == ISD::SPLAT_VECTOR)
- if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0)))
- return CN;
-
- return nullptr;
+ EVT VT = N.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorMinNumElements())
+ : APInt(1, 1);
+ return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs);
}
ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
@@ -10751,10 +11049,15 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
BitVector UndefElements;
ConstantFPSDNode *CN =
BV->getConstantFPSplatNode(DemandedElts, &UndefElements);
+ // TODO: Look into whether we should allow UndefElements in non-DemandedElts
if (CN && (UndefElements.none() || AllowUndefs))
return CN;
}
+ if (N.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0)))
+ return CN;
+
return nullptr;
}
@@ -10808,7 +11111,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
// TODO: Make MachineMemOperands aware of scalable vectors.
- assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() &&
+ assert(memvt.getStoreSize().getKnownMinValue() <= MMO->getSize() &&
"Size mismatch!");
}
@@ -11221,7 +11524,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
if (LD->getChain() != Base->getChain())
return false;
- EVT VT = LD->getValueType(0);
+ EVT VT = LD->getMemoryVT();
if (VT.getSizeInBits() / 8 != Bytes)
return false;
@@ -11234,8 +11537,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
}
-/// InferPtrAlignment - Infer alignment of a load / store address. Return None
-/// if it cannot be inferred.
+/// InferPtrAlignment - Infer alignment of a load / store address. Return
+/// std::nullopt if it cannot be inferred.
MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
const GlobalValue *GV = nullptr;
@@ -11267,7 +11570,7 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset);
}
- return None;
+ return std::nullopt;
}
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
@@ -11690,30 +11993,30 @@ bool BuildVectorSDNode::isConstant() const {
return true;
}
-Optional<std::pair<APInt, APInt>>
+std::optional<std::pair<APInt, APInt>>
BuildVectorSDNode::isConstantSequence() const {
unsigned NumOps = getNumOperands();
if (NumOps < 2)
- return None;
+ return std::nullopt;
if (!isa<ConstantSDNode>(getOperand(0)) ||
!isa<ConstantSDNode>(getOperand(1)))
- return None;
+ return std::nullopt;
unsigned EltSize = getValueType(0).getScalarSizeInBits();
APInt Start = getConstantOperandAPInt(0).trunc(EltSize);
APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start;
if (Stride.isZero())
- return None;
+ return std::nullopt;
for (unsigned i = 2; i < NumOps; ++i) {
if (!isa<ConstantSDNode>(getOperand(i)))
- return None;
+ return std::nullopt;
APInt Val = getConstantOperandAPInt(i).trunc(EltSize);
if (Val != (Start + (Stride * i)))
- return None;
+ return std::nullopt;
}
return std::make_pair(Start, Stride);
@@ -11847,6 +12150,18 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
}
}
+void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
+ assert(From && To && "Invalid SDNode; empty source SDValue?");
+ auto I = SDEI.find(From);
+ if (I == SDEI.end())
+ return;
+
+ // Use of operator[] on the DenseMap may cause an insertion, which invalidates
+ // the iterator, hence the need to make a copy to prevent a use-after-free.
+ NodeExtraInfo Copy = I->second;
+ SDEI[To] = std::move(Copy);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d236433f6fb4..a432d8e92bca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -85,9 +85,9 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
}
bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
- const Optional<int64_t> NumBytes0,
+ const std::optional<int64_t> NumBytes0,
const SDNode *Op1,
- const Optional<int64_t> NumBytes1,
+ const std::optional<int64_t> NumBytes1,
const SelectionDAG &DAG, bool &IsAlias) {
BaseIndexOffset BasePtr0 = match(Op0, DAG);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 35650b9bd00e..0bdfdac6a65f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,8 +15,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -27,10 +25,12 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -63,6 +63,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -99,6 +100,7 @@
#include <cstddef>
#include <iterator>
#include <limits>
+#include <optional>
#include <tuple>
using namespace llvm;
@@ -148,18 +150,18 @@ static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<CallingConv::ID> CC);
+ std::optional<CallingConv::ID> CC);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger than ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
- const SDValue *Parts, unsigned NumParts,
- MVT PartVT, EVT ValueVT, const Value *V,
- Optional<CallingConv::ID> CC = None,
- Optional<ISD::NodeType> AssertOp = None) {
+static SDValue
+getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
+ unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
+ std::optional<CallingConv::ID> CC = std::nullopt,
+ std::optional<ISD::NodeType> AssertOp = std::nullopt) {
// Let the target assemble the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
@@ -180,8 +182,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
unsigned ValueBits = ValueVT.getSizeInBits();
// Assemble the power of 2 part.
- unsigned RoundParts =
- (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundParts = llvm::bit_floor(NumParts);
unsigned RoundBits = PartBits * RoundParts;
EVT RoundVT = RoundBits == ValueBits ?
ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
@@ -320,7 +321,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<CallingConv::ID> CallConv) {
+ std::optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const bool IsABIRegCopy = CallConv.has_value();
@@ -397,10 +398,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- // If the element type of the source/dest vectors are the same, but the
- // parts vector has more elements than the value vector, then we have a
- // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
- // elements we want.
+ // If the parts vector has more elements than the value vector, then we
+ // have a vector widening case (e.g. <2 x float> -> <4 x float>).
+ // Extract the elements we want.
if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
@@ -414,6 +414,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
DAG.getVectorIdxConstant(0, DL));
if (PartEVT == ValueVT)
return Val;
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
// Promoted vector extract
@@ -447,12 +449,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Handle cases such as i8 -> <1 x i1>
EVT ValueSVT = ValueVT.getVectorElementType();
if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
- if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
+ unsigned ValueSize = ValueSVT.getSizeInBits();
+ if (ValueSize == PartEVT.getSizeInBits()) {
Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
- else
+ } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) {
+ // It's possible a scalar floating point type gets softened to integer and
+ // then promoted to a larger integer. If PartEVT is the larger integer
+ // we need to truncate it and then bitcast to the FP type.
+ assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types");
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
+ Val = DAG.getBitcast(ValueSVT, Val);
+ } else {
Val = ValueVT.isFloatingPoint()
? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
: DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+ }
}
return DAG.getBuildVector(ValueVT, DL, Val);
@@ -461,16 +473,16 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
- Optional<CallingConv::ID> CallConv);
+ std::optional<CallingConv::ID> CallConv);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
- SDValue *Parts, unsigned NumParts, MVT PartVT,
- const Value *V,
- Optional<CallingConv::ID> CallConv = None,
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+static void
+getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, const Value *V,
+ std::optional<CallingConv::ID> CallConv = std::nullopt,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
// Let the target split the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
@@ -555,7 +567,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
// The number of parts is not a power of 2. Split off and copy the tail.
assert(PartVT.isInteger() && ValueVT.isInteger() &&
"Do not know what to expand to!");
- unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundParts = llvm::bit_floor(NumParts);
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
@@ -643,7 +655,7 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
- Optional<CallingConv::ID> CallConv) {
+ std::optional<CallingConv::ID> CallConv) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -678,7 +690,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
} else {
- if (ValueVT.getVectorElementCount().isScalar()) {
+ // Don't extract an integer from a float vector. This can happen if the
+ // FP type gets softened to integer and then promoted. The promotion
+ // prevents it from being picked up by the earlier bitcast case.
+ if (ValueVT.getVectorElementCount().isScalar() &&
+ (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getVectorIdxConstant(0, DL));
} else {
@@ -703,8 +719,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT,
- NumIntermediates, RegisterVT);
+ *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -718,7 +734,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
"Mixing scalable and fixed vectors when copying in parts");
- Optional<ElementCount> DestEltCnt;
+ std::optional<ElementCount> DestEltCnt;
if (IntermediateVT.isVector())
DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
@@ -786,13 +802,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
}
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
- EVT valuevt, Optional<CallingConv::ID> CC)
+ EVT valuevt, std::optional<CallingConv::ID> CC)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
RegCount(1, regs.size()), CallConv(CC) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- Optional<CallingConv::ID> CC) {
+ std::optional<CallingConv::ID> CC) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
CallConv = CC;
@@ -800,11 +816,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
for (EVT ValueVT : ValueVTs) {
unsigned NumRegs =
isABIMangled()
- ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT)
+ ? TLI.getNumRegistersForCallingConv(Context, *CC, ValueVT)
: TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT =
isABIMangled()
- ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT)
+ ? TLI.getRegisterTypeForCallingConv(Context, *CC, ValueVT)
: TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
@@ -831,10 +847,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
- MVT RegisterVT =
- isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(), CallConv.value(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), *CallConv, RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -914,10 +930,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
- MVT RegisterVT =
- isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(), CallConv.value(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), *CallConv, RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -1025,8 +1041,10 @@ RegsForValue::getRegsAndSizes() const {
}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
+ AssumptionCache *ac,
const TargetLibraryInfo *li) {
AA = aa;
+ AC = ac;
GFI = gfi;
LibInfo = li;
Context = DAG.getContext();
@@ -1117,18 +1135,57 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
+ // Add SDDbgValue nodes for any var locs here. Do so before updating
+ // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
+ if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) {
+ // Add SDDbgValue nodes for any var locs here. Do so before updating
+ // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
+ for (auto It = FnVarLocs->locs_begin(&I), End = FnVarLocs->locs_end(&I);
+ It != End; ++It) {
+ auto *Var = FnVarLocs->getDILocalVariable(It->VariableID);
+ dropDanglingDebugInfo(Var, It->Expr);
+ if (!handleDebugValue(It->V, Var, It->Expr, It->DL, SDNodeOrder,
+ /*IsVariadic=*/false))
+ addDanglingDebugInfo(It, SDNodeOrder);
+ }
+ }
+
// Increase the SDNodeOrder if dealing with a non-debug instruction.
if (!isa<DbgInfoIntrinsic>(I))
++SDNodeOrder;
CurInst = &I;
+ // Set inserted listener only if required.
+ bool NodeInserted = false;
+ std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener;
+ MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections);
+ if (PCSectionsMD) {
+ InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>(
+ DAG, [&](SDNode *) { NodeInserted = true; });
+ }
+
visit(I.getOpcode(), I);
if (!I.isTerminator() && !HasTailCall &&
!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
+ // Handle metadata.
+ if (PCSectionsMD) {
+ auto It = NodeMap.find(&I);
+ if (It != NodeMap.end()) {
+ DAG.addPCSections(It->second.getNode(), PCSectionsMD);
+ } else if (NodeInserted) {
+ // This should not happen; if it does, don't let it go unnoticed so we can
+ // fix it. Relevant visit*() function is probably missing a setValue().
+ errs() << "warning: loosing !pcsections metadata ["
+ << I.getModule()->getName() << "]\n";
+ LLVM_DEBUG(I.dump());
+ assert(false);
+ }
+ }
+
CurInst = nullptr;
}
@@ -1148,8 +1205,13 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc,
+ unsigned Order) {
+ DanglingDebugInfoMap[VarLoc->V].emplace_back(VarLoc, Order);
+}
+
void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
- DebugLoc DL, unsigned Order) {
+ unsigned Order) {
// We treat variadic dbg_values differently at this stage.
if (DI->hasArgList()) {
// For variadic dbg_values we will now insert an undef.
@@ -1161,7 +1223,7 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
}
SDDbgValue *SDV = DAG.getDbgValueList(
DI->getVariable(), DI->getExpression(), Locs, {},
- /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true);
+ /*IsIndirect=*/false, DI->getDebugLoc(), Order, /*IsVariadic=*/true);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
} else {
// TODO: Dangling debug info will eventually either be resolved or produce
@@ -1171,18 +1233,18 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
assert(DI->getNumVariableLocationOps() == 1 &&
"DbgValueInst without an ArgList should have a single location "
"operand.");
- DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
+ DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order);
}
}
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr) {
auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
- const DbgValueInst *DI = DDI.getDI();
- DIVariable *DanglingVariable = DI->getVariable();
- DIExpression *DanglingExpr = DI->getExpression();
+ DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *DanglingExpr = DDI.getExpression();
if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
- LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI)
+ << "\n");
return true;
}
return false;
@@ -1211,15 +1273,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
for (auto &DDI : DDIV) {
- const DbgValueInst *DI = DDI.getDI();
- assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
- assert(DI && "Ill-formed DanglingDebugInfo");
- DebugLoc dl = DDI.getdl();
+ DebugLoc DL = DDI.getDebugLoc();
unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
- DILocalVariable *Variable = DI->getVariable();
- DIExpression *Expr = DI->getExpression();
- assert(Variable->isValidLocationForIntrinsic(dl) &&
+ DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *Expr = DDI.getExpression();
+ assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
SDDbgValue *SDV;
if (Val.getNode()) {
@@ -1229,10 +1288,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
// in the first place we should not be more successful here). Unless we
// have some test case that prove this to be correct we should avoid
// calling EmitFuncArgumentDbgValue here.
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl,
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL,
FuncArgumentDbgValueKind::Value, Val)) {
- LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
- << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI)
+ << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
// Increase the SDNodeOrder for the DbgValue here to make sure it is
// inserted after the definition of Val when emitting the instructions
@@ -1241,17 +1300,17 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
<< "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
<< ValSDNodeOrder << "\n");
- SDV = getDbgValue(Val, Variable, Expr, dl,
+ SDV = getDbgValue(Val, Variable, Expr, DL,
std::max(DbgSDNodeOrder, ValSDNodeOrder));
DAG.AddDbgValue(SDV, false);
} else
- LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
- << "in EmitFuncArgumentDbgValue\n");
+ LLVM_DEBUG(dbgs() << "Resolved dangling debug info for "
+ << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n");
} else {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
- auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n");
+ auto Undef = UndefValue::get(V->getType());
auto SDV =
- DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
+ DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, false);
}
}
@@ -1263,21 +1322,19 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// state of `handleDebugValue`, we need know specifically which values were
// invalid, so that we attempt to salvage only those values when processing
// a DIArgList.
- assert(!DDI.getDI()->hasArgList() &&
- "Not implemented for variadic dbg_values");
- Value *V = DDI.getDI()->getValue(0);
- DILocalVariable *Var = DDI.getDI()->getVariable();
- DIExpression *Expr = DDI.getDI()->getExpression();
- DebugLoc DL = DDI.getdl();
- DebugLoc InstDL = DDI.getDI()->getDebugLoc();
+ Value *V = DDI.getVariableLocationOp(0);
+ Value *OrigV = V;
+ DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *Expr = DDI.getExpression();
+ DebugLoc DL = DDI.getDebugLoc();
unsigned SDOrder = DDI.getSDNodeOrder();
+
// Currently we consider only dbg.value intrinsics -- we tell the salvager
// that DW_OP_stack_value is desired.
- assert(isa<DbgValueInst>(DDI.getDI()));
bool StackValue = true;
// Can this Value can be encoded without any further work?
- if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false))
+ if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false))
return;
// Attempt to salvage back through as many instructions as possible. Bail if
@@ -1306,10 +1363,10 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
- if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
- /*IsVariadic=*/false)) {
- LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
- << *DDI.getDI() << "\nBy stripping back to:\n " << *V);
+ if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) {
+ LLVM_DEBUG(
+ dbgs() << "Salvaged debug location info for:\n " << *Var << "\n"
+ << *OrigV << "\nBy stripping back to:\n " << *V << "\n");
return;
}
}
@@ -1317,21 +1374,18 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// This was the final opportunity to salvage this debug information, and it
// couldn't be done. Place an undef DBG_VALUE at this location to terminate
// any earlier variable location.
- auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
- auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
+ assert(OrigV && "V shouldn't be null");
+ auto *Undef = UndefValue::get(OrigV->getType());
+ auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
-
- LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI()
- << "\n");
- LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI)
<< "\n");
}
bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
DILocalVariable *Var,
- DIExpression *Expr, DebugLoc dl,
- DebugLoc InstDL, unsigned Order,
- bool IsVariadic) {
+ DIExpression *Expr, DebugLoc DbgLoc,
+ unsigned Order, bool IsVariadic) {
if (Values.empty())
return true;
SmallVector<SDDbgOperand> LocationOps;
@@ -1344,6 +1398,13 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
continue;
}
+ // Look through IntToPtr constants.
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ LocationOps.emplace_back(SDDbgOperand::fromConst(CE->getOperand(0)));
+ continue;
+ }
+
// If the Value is a frame index, we can create a FrameIndex debug value
// without relying on the DAG at all.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
@@ -1362,7 +1423,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
if (N.getNode()) {
// Only emit func arg dbg value for non-variadic dbg.values for now.
if (!IsVariadic &&
- EmitFuncArgumentDbgValue(V, Var, Expr, dl,
+ EmitFuncArgumentDbgValue(V, Var, Expr, DbgLoc,
FuncArgumentDbgValueKind::Value, N))
return true;
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
@@ -1391,7 +1452,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
// they're parameters, and they are parameters of the current function. We
// need to let them dangle until they get an SDNode.
bool IsParamOfFunc =
- isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
+ isa<Argument>(V) && Var->isParameter() && !DbgLoc.getInlinedAt();
if (IsParamOfFunc)
return false;
@@ -1404,7 +1465,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
// If this is a PHI node, it may be split up into several MI PHI nodes
// (in FunctionLoweringInfo::set).
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), None);
+ V->getType(), std::nullopt);
if (RFV.occupiesMultipleRegs()) {
// FIXME: We could potentially support variadic dbg_values here.
if (IsVariadic)
@@ -1429,7 +1490,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
if (!FragmentExpr)
continue;
SDDbgValue *SDV = DAG.getVRegDbgValue(
- Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
+ Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
@@ -1446,9 +1507,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
// We have created a SDDbgOperand for each Value in Values.
// Should use Order instead of SDNodeOrder?
assert(!LocationOps.empty());
- SDDbgValue *SDV =
- DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
- /*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic);
+ SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
+ /*IsIndirect=*/false, DbgLoc,
+ SDNodeOrder, IsVariadic);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
return true;
}
@@ -1472,7 +1533,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Ty,
- None); // This is not an ABI copy.
+ std::nullopt); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
@@ -1647,12 +1708,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
else
Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
- if (isa<ScalableVectorType>(VecTy))
- return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
-
- SmallVector<SDValue, 16> Ops;
- Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
- return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ return NodeMap[V] = DAG.getSplat(VT, getCurSDLoc(), Op);
}
llvm_unreachable("Unknown vector constant");
@@ -1664,16 +1720,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
- return DAG.getFrameIndex(SI->second,
- TLI.getFrameIndexTy(DAG.getDataLayout()));
+ return DAG.getFrameIndex(
+ SI->second, TLI.getValueType(DAG.getDataLayout(), AI->getType()));
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
- unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ Register InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
- Inst->getType(), None);
+ Inst->getType(), std::nullopt);
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -2082,7 +2138,7 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
// Already exported?
if (FuncInfo.isExportedInst(V)) return;
- unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ Register Reg = FuncInfo.InitializeRegForValue(V);
CopyValueToVirtualRegister(V, Reg);
}
@@ -2536,6 +2592,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
+ setValue(CurInst, BrCond);
+
// Insert the false branch. Do this even if it's a fall through branch,
// this makes it easier to do DAG optimizations which require inverting
// the branch condition.
@@ -2746,7 +2804,8 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
CallOptions.setDiscardResult(true);
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- None, CallOptions, getCurSDLoc()).second;
+ std::nullopt, CallOptions, getCurSDLoc())
+ .second;
// On PS4/PS5, the "return address" must still be within the calling
// function, even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
@@ -2835,7 +2894,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MVT VT = BB.RegVT;
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
SDValue Cmp;
- unsigned PopCount = countPopulation(B.Mask);
+ unsigned PopCount = llvm::popcount(B.Mask);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
@@ -3000,7 +3059,8 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
BasicBlock *Dest = I.getIndirectDest(i);
MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
Target->setIsInlineAsmBrIndirectTarget();
- Target->setHasAddressTaken();
+ Target->setMachineBlockAddressTaken();
+ Target->setLabelMustBeEmitted();
// Don't add duplicate machine successors.
if (Dests.insert(Dest).second)
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
@@ -3279,7 +3339,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Flags.copyFMF(*FPOp);
// Min/max matching is only viable if all output VTs are the same.
- if (is_splat(ValueVTs)) {
+ if (all_equal(ValueVTs)) {
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
@@ -3339,7 +3399,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
break;
case SPF_NABS:
Negate = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case SPF_ABS:
IsUnaryAbs = true;
Opc = ISD::ABS;
@@ -3375,8 +3435,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Values[i] =
DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
if (Negate)
- Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
- Values[i]);
+ Values[i] = DAG.getNegative(Values[i], dl, VT);
}
} else {
for (unsigned i = 0; i != NumValues; ++i) {
@@ -3537,7 +3596,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InVal = getValue(I.getOperand(1));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
@@ -3547,7 +3606,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
void SelectionDAGBuilder::visitExtractElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
@@ -3716,7 +3775,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
// Calculate new mask.
- SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
+ SmallVector<int, 8> MappedOps(Mask);
for (int &Idx : MappedOps) {
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
@@ -3856,10 +3915,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
- if (VectorElementCount.isScalable())
- N = DAG.getSplatVector(VT, dl, N);
- else
- N = DAG.getSplatBuildVector(VT, dl, N);
+ N = DAG.getSplat(VT, dl, N);
}
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
@@ -3891,7 +3947,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
- APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
+ APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
bool ElementScalable = ElementSize.isScalable();
// If this is a scalar constant or a splat vector of constants,
@@ -3931,10 +3987,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
VectorElementCount);
- if (VectorElementCount.isScalable())
- IdxN = DAG.getSplatVector(VT, dl, IdxN);
- else
- IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+ IdxN = DAG.getSplat(VT, dl, IdxN);
}
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -4000,7 +4053,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
@@ -4019,7 +4072,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
if (*Alignment <= StackAlign)
- Alignment = None;
+ Alignment = std::nullopt;
const uint64_t StackAlignMask = StackAlign.value() - 1U;
// Round the size of the allocation up to the stack alignment size
@@ -4068,11 +4121,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Ptr = getValue(SV);
Type *Ty = I.getType();
- Align Alignment = I.getAlign();
-
- AAMDNodes AAInfo = I.getAAMetadata();
- const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
-
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
@@ -4080,9 +4128,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (NumValues == 0)
return;
+ Align Alignment = I.getAlign();
+ AAMDNodes AAInfo = I.getAAMetadata();
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
bool isVolatile = I.isVolatile();
MachineMemOperand::Flags MMOFlags =
- TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+ TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
SDValue Root;
bool ConstantMemory = false;
@@ -4100,11 +4151,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Root = DAG.getEntryNode();
ConstantMemory = true;
MMOFlags |= MachineMemOperand::MOInvariant;
-
- // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
- // but the previous usage implied it did. Probably should check
- // isDereferenceableAndAlignedPointer.
- MMOFlags |= MachineMemOperand::MODereferenceable;
} else {
// Do not serialize non-volatile loads against each other.
Root = DAG.getRoot();
@@ -4135,7 +4181,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (ChainI == MaxParallelChains) {
assert(PendingLoads.empty() && "PendingLoads must be serialized first");
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
@@ -4157,7 +4203,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
if (isVolatile)
DAG.setRoot(Chain);
else
@@ -4278,7 +4324,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
// See visitLoad comments.
if (ChainI == MaxParallelChains) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
@@ -4294,7 +4340,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
+ setValue(&I, StoreNode);
DAG.setRoot(StoreNode);
}
@@ -4316,7 +4363,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
- Alignment = None;
+ Alignment = std::nullopt;
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4400,17 +4447,17 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
return false;
+ uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+
+ // Target may not support the required addressing mode.
+ if (ScaleVal != 1 &&
+ !TLI.isLegalScaleForGatherScatter(ScaleVal, ElemSize))
+ return false;
+
Base = SDB->getValue(BasePtr);
Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
- // MGATHER/MSCATTER are only required to support scaling by one or by the
- // element size. Other scales may be produced using target-specific DAG
- // combines.
- uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
- if (ScaleVal != ElemSize && ScaleVal != 1)
- return false;
-
Scale =
DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
@@ -4478,7 +4525,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
MaybeAlign &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = None;
+ Alignment = std::nullopt;
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
@@ -4624,6 +4671,12 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
+ case AtomicRMWInst::UIncWrap:
+ NT = ISD::ATOMIC_LOAD_UINC_WRAP;
+ break;
+ case AtomicRMWInst::UDecWrap:
+ NT = ISD::ATOMIC_LOAD_UDEC_WRAP;
+ break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
@@ -4659,7 +4712,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
TLI.getFenceOperandTy(DAG.getDataLayout()));
Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
- DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
+ SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
+ setValue(&I, N);
+ DAG.setRoot(N);
}
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
@@ -4677,7 +4732,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
- auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+ auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
@@ -4726,7 +4781,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlign().value() < MemVT.getSizeInBits() / 8)
+ if (!TLI.supportsUnalignedAtomics() &&
+ I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4745,13 +4801,14 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for stores to prevent future divergence.
SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
+ setValue(&I, S);
DAG.setRoot(S);
return;
}
SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
Ptr, Val, MMO);
-
+ setValue(&I, OutChain);
DAG.setRoot(OutChain);
}
@@ -4826,13 +4883,21 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
+ // In some cases, custom collection of operands from CallInst I may be needed.
+ TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- Result =
- DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size,
- I.getAAMetadata());
+ //
+ // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
+ // didn't yield anything useful.
+ MachinePointerInfo MPI;
+ if (Info.ptrVal)
+ MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
+ else if (Info.fallbackAddressSpace)
+ MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+ Info.memVT, MPI, Info.align, Info.flags,
+ Info.size, I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -5515,17 +5580,20 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
- auto MIB = BuildMI(MF, DL, Inst);
- MIB.addReg(Reg);
- MIB.addImm(0);
- MIB.addMetadata(Variable);
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+
auto *NewDIExpr = FragExpr;
// We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
// the DIExpression.
if (Indirect)
NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
- MIB.addMetadata(NewDIExpr);
- return MIB;
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ NewDIExpr = DIExpression::prependOpcodes(NewDIExpr, Ops);
+ return BuildMI(MF, DL, Inst, false, MOs, Variable, NewDIExpr);
} else {
// Create a completely standard DBG_VALUE.
auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
@@ -5599,7 +5667,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
bool IsIndirect = false;
- Optional<MachineOperand> Op;
+ std::optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI != std::numeric_limits<int>::max())
@@ -5680,7 +5748,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
- V->getType(), None);
+ V->getType(), std::nullopt);
if (RFV.occupiesMultipleRegs()) {
splitMultiRegDbgValue(RFV.getRegsAndSizes());
return true;
@@ -6026,6 +6094,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ return;
// Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
// they are non-variadic.
const auto &DI = cast<DbgVariableIntrinsic>(I);
@@ -6125,7 +6196,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.AddDbgLabel(SDV);
return;
}
+ case Intrinsic::dbg_assign: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ assert(isAssignmentTrackingEnabled(*I.getFunction()->getParent()) &&
+ "expected assignment tracking to be enabled");
+ return;
+ }
case Intrinsic::dbg_value: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ return;
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
@@ -6140,9 +6220,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
bool IsVariadic = DI.hasArgList();
- if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
+ if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(),
SDNodeOrder, IsVariadic))
- addDanglingDebugInfo(&DI, dl, SDNodeOrder);
+ addDanglingDebugInfo(&DI, SDNodeOrder);
return;
}
@@ -6358,7 +6438,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Get the last argument, the metadata and convert it to an integer in the
// call
Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
- Optional<RoundingMode> RoundMode =
+ std::optional<RoundingMode> RoundMode =
convertStrToRoundingMode(cast<MDString>(MD)->getString());
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -6750,8 +6830,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
- case Intrinsic::flt_rounds:
- Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
+ case Intrinsic::get_rounding:
+ Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot());
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
@@ -6846,7 +6926,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Result = DAG.getMemIntrinsicNode(
ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
- /* align */ None, Flags);
+ /* align */ std::nullopt, Flags);
// Chain the prefetch in parallell with any pending loads, to stay out of
// the way of later optimizations.
@@ -7178,6 +7258,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
return;
}
+ case Intrinsic::threadlocal_address: {
+ setValue(&I, getValue(I.getOperand(0)));
+ return;
+ }
case Intrinsic::get_active_lane_mask: {
EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Index = getValue(I.getOperand(0));
@@ -7191,14 +7275,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue TripCount = getValue(I.getOperand(1));
auto VecTy = CCVT.changeVectorElementType(ElementVT);
- SDValue VectorIndex, VectorTripCount;
- if (VecTy.isScalableVector()) {
- VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
- VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
- } else {
- VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
- VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
- }
+ SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index);
+ SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount);
SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
@@ -7253,11 +7331,6 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
- ValueVTs.push_back(MVT::Other); // Out chain
-
// We do not need to serialize constrained FP intrinsics against
// each other or against (nonvolatile) loads, so they can be
// chained like loads.
@@ -7286,7 +7359,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
// The only reason why ebIgnore nodes still need to be chained is that
// they might depend on the current rounding mode, and therefore must
// not be moved across instruction that may change that mode.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case fp::ExceptionBehavior::ebMayTrap:
// These must not be moved across calls or instructions that may change
// floating-point exception masks.
@@ -7301,7 +7374,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
}
};
- SDVTList VTs = DAG.getVTList(ValueVTs);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
SDNodeFlags Flags;
@@ -7323,8 +7398,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
Opcode = ISD::STRICT_FMA;
// Break fmuladd into fmul and fadd.
if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
- !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
- ValueVTs[0])) {
+ !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
Opers.pop_back();
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
pushOutChain(Mul, EB);
@@ -7365,8 +7439,18 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
}
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
- Optional<unsigned> ResOPC;
+ std::optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
+ case Intrinsic::vp_ctlz: {
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
+ break;
+ }
+ case Intrinsic::vp_cttz: {
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
+ break;
+ }
#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
case Intrinsic::VPID: \
ResOPC = ISD::VPSD; \
@@ -7388,118 +7472,133 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
return *ResOPC;
}
-void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues,
- bool IsGather) {
+void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues) {
SDLoc DL = getCurSDLoc();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
bool AddToChain = true;
- if (!IsGather) {
- // Do not serialize variable-length loads of constant memory with
- // anything.
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
- MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
- AddToChain = !AA || !AA->pointsToConstantMemory(ML);
- SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
- LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
- MMO, false /*IsExpanding */);
- } else {
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT.getScalarType());
- unsigned AS =
- PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
- SDValue Base, Index, Scale;
- ISD::MemIndexType IndexType;
- bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent(),
- VT.getScalarStoreSize());
- if (!UniformBase) {
- Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
- Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_SCALED;
- Scale =
- DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
- }
- EVT IdxVT = Index.getValueType();
- EVT EltTy = IdxVT.getVectorElementType();
- if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
- EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
- Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
- }
- LD = DAG.getGatherVP(
- DAG.getVTList(VT, MVT::Other), VT, DL,
- {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
- IndexType);
- }
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO, false /*IsExpanding */);
if (AddToChain)
PendingLoads.push_back(LD.getValue(1));
setValue(&VPIntrin, LD);
}
-void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues,
- bool IsScatter) {
+void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ LD = DAG.getGatherVP(
+ DAG.getVTList(VT, MVT::Other), VT, DL,
+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
+ IndexType);
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues) {
+ SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
SDValue ST;
- if (!IsScatter) {
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
- SDValue Ptr = OpValues[1];
- SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, AAInfo);
- ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
- OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
- /* IsTruncating */ false, /*IsCompressing*/ false);
- } else {
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT.getScalarType());
- unsigned AS =
- PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, AAInfo);
- SDValue Base, Index, Scale;
- ISD::MemIndexType IndexType;
- bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent(),
- VT.getScalarStoreSize());
- if (!UniformBase) {
- Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
- Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_SCALED;
- Scale =
- DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
- }
- EVT IdxVT = Index.getValueType();
- EVT EltTy = IdxVT.getVectorElementType();
- if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
- EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
- Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
- }
- ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
- {getMemoryRoot(), OpValues[0], Base, Index, Scale,
- OpValues[2], OpValues[3]},
- MMO, IndexType);
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ SDValue Ptr = OpValues[1];
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
+ OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
+ /* IsTruncating */ false, /*IsCompressing*/ false);
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
+void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
}
+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,
+ OpValues[2], OpValues[3]},
+ MMO, IndexType);
DAG.setRoot(ST);
setValue(&VPIntrin, ST);
}
@@ -7626,20 +7725,78 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
break;
}
case ISD::VP_LOAD:
+ visitVPLoad(VPIntrin, ValueVTs[0], OpValues);
+ break;
case ISD::VP_GATHER:
- visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
- Opcode == ISD::VP_GATHER);
+ visitVPGather(VPIntrin, ValueVTs[0], OpValues);
break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues);
break;
case ISD::VP_STORE:
+ visitVPStore(VPIntrin, OpValues);
+ break;
case ISD::VP_SCATTER:
- visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
+ visitVPScatter(VPIntrin, OpValues);
break;
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
visitVPStridedStore(VPIntrin, OpValues);
break;
+ case ISD::VP_FMULADD: {
+ assert(OpValues.size() == 5 && "Unexpected number of operands");
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
+ SDFlags.copyFMF(*FPMO);
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) {
+ setValue(&VPIntrin, DAG.getNode(ISD::VP_FMA, DL, VTs, OpValues, SDFlags));
+ } else {
+ SDValue Mul = DAG.getNode(
+ ISD::VP_FMUL, DL, VTs,
+ {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, SDFlags);
+ SDValue Add =
+ DAG.getNode(ISD::VP_FADD, DL, VTs,
+ {Mul, OpValues[2], OpValues[3], OpValues[4]}, SDFlags);
+ setValue(&VPIntrin, Add);
+ }
+ break;
+ }
+ case ISD::VP_INTTOPTR: {
+ SDValue N = OpValues[0];
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), VPIntrin.getType());
+ N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1],
+ OpValues[2]);
+ N = DAG.getVPZExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1],
+ OpValues[2]);
+ setValue(&VPIntrin, N);
+ break;
+ }
+ case ISD::VP_PTRTOINT: {
+ SDValue N = OpValues[0];
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ VPIntrin.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(),
+ VPIntrin.getOperand(0)->getType());
+ N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1],
+ OpValues[2]);
+ N = DAG.getVPZExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1],
+ OpValues[2]);
+ setValue(&VPIntrin, N);
+ break;
+ }
+ case ISD::VP_ABS:
+ case ISD::VP_CTLZ:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ:
+ case ISD::VP_CTTZ_ZERO_UNDEF: {
+ // Pop is_zero_poison operand for cp.ctlz/cttz or
+ // is_int_min_poison operand for vp.abs.
+ OpValues.pop_back();
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+ break;
+ }
}
}
@@ -7820,6 +7977,17 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
if (TLI.supportSwiftError() && SwiftErrorVal)
isTailCall = false;
+ ConstantInt *CFIType = nullptr;
+ if (CB.isIndirectCall()) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi)) {
+ if (!TLI.supportKCFIBundles())
+ report_fatal_error(
+ "Target doesn't support calls with kcfi operand bundles.");
+ CFIType = cast<ConstantInt>(Bundle->Inputs[0]);
+ assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type");
+ }
+ }
+
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
@@ -7827,7 +7995,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
.setTailCall(isTailCall)
.setConvergent(CB.isConvergent())
.setIsPreallocated(
- CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
+ CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
+ .setCFIType(CFIType);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
@@ -8200,9 +8369,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- if (Function *F = I.getCalledFunction()) {
- diagnoseDontCall(I);
+ diagnoseDontCall(I);
+ if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
@@ -8371,7 +8540,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
- LLVMContext::OB_clang_arc_attachedcall}) &&
+ LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
@@ -8499,7 +8668,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static llvm::Optional<unsigned>
+static std::optional<unsigned>
getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &RefOpInfo) {
@@ -8513,7 +8682,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No work to do for memory/address operands.
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
OpInfo.ConstraintType == TargetLowering::C_Address)
- return None;
+ return std::nullopt;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
@@ -8523,7 +8692,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
- return None;
+ return std::nullopt;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
@@ -8568,7 +8737,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
- return None;
+ return std::nullopt;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
@@ -8606,7 +8775,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- return None;
+ return std::nullopt;
}
static unsigned
@@ -8665,6 +8834,23 @@ public:
} // end anonymous namespace
+static bool isFunction(SDValue Op) {
+ if (Op && Op.getOpcode() == ISD::GlobalAddress) {
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ auto Fn = dyn_cast_or_null<Function>(GA->getGlobal());
+
+ // In normal "call dllimport func" instruction (non-inlineasm) it force
+ // indirect access by specifing call opcode. And usually specially print
+ // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can
+ // not do in this way now. (In fact, this is similar with "Data Access"
+ // action). So here we ignore dllimport function.
+ if (Fn && !Fn->hasDLLImportStorageClass())
+ return true;
+ }
+ }
+ return false;
+}
+
/// visitInlineAsm - Handle a call to an InlineAsm object.
void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
const BasicBlock *EHPadBB) {
@@ -8713,7 +8899,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// memory and is nonvolatile.
SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
- bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
+ bool EmitEHLabels = isa<InvokeInst>(Call);
if (EmitEHLabels) {
assert(EHPadBB && "InvokeInst must have an EHPadBB");
}
@@ -8731,8 +8917,15 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
}
+ int OpNo = -1;
+ SmallVector<StringRef> AsmStrs;
+ IA->collectAsmStrs(AsmStrs);
+
// Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput)
+ OpNo++;
+
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
// other is floating point, or their sizes are different, flag it as an
@@ -8750,6 +8943,32 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.ConstraintType == TargetLowering::C_Address)
continue;
+ // In Linux PIC model, there are 4 cases about value/label addressing:
+ //
+ // 1: Function call or Label jmp inside the module.
+ // 2: Data access (such as global variable, static variable) inside module.
+ // 3: Function call or Label jmp outside the module.
+ // 4: Data access (such as global variable) outside the module.
+ //
+ // Due to current llvm inline asm architecture designed to not "recognize"
+ // the asm code, there are quite troubles for us to treat mem addressing
+ // differently for same value/adress used in different instuctions.
+ // For example, in pic model, call a func may in plt way or direclty
+ // pc-related, but lea/mov a function adress may use got.
+ //
+ // Here we try to "recognize" function call for the case 1 and case 3 in
+ // inline asm. And try to adjust the constraint for them.
+ //
+ // TODO: Due to current inline asm didn't encourage to jmp to the outsider
+ // label, so here we don't handle jmp function label now, but we need to
+ // enhance it (especilly in PIC model) if we meet meaningful requirements.
+ if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) &&
+ TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) &&
+ TM.getCodeModel() != CodeModel::Large) {
+ OpInfo.isIndirect = false;
+ OpInfo.ConstraintType = TargetLowering::C_Address;
+ }
+
// If this is a memory input, and if the operand is not indirect, do what we
// need to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
@@ -8800,7 +9019,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (RegError) {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- const char *RegName = TRI.getName(RegError.value());
+ const char *RegName = TRI.getName(*RegError);
emitInlineAsmError(Call, "register '" + Twine(RegName) +
"' allocated for constraint '" +
Twine(OpInfo.ConstraintCode) +
@@ -8959,8 +9178,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
}
- if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- OpInfo.ConstraintType == TargetLowering::C_Address) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert((OpInfo.isIndirect ||
OpInfo.ConstraintType != TargetLowering::C_Memory) &&
"Operand must be indirect to be a mem!");
@@ -8983,6 +9201,37 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
}
+ if (OpInfo.ConstraintType == TargetLowering::C_Address) {
+ assert(InOperandVal.getValueType() ==
+ TLI.getPointerTy(DAG.getDataLayout()) &&
+ "Address operands expect pointer values");
+
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+
+ SDValue AsmOp = InOperandVal;
+ if (isFunction(InOperandVal)) {
+ auto *GA = cast<GlobalAddressSDNode>(InOperandVal);
+ ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1);
+ AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(),
+ InOperandVal.getValueType(),
+ GA->getOffset());
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+
+ AsmNodeOperands.push_back(
+ DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32));
+
+ AsmNodeOperands.push_back(AsmOp);
+ break;
+ }
+
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
@@ -9047,7 +9296,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
ResultTypes = StructResult->elements();
else if (!CallResultType->isVoidTy())
- ResultTypes = makeArrayRef(CallResultType);
+ ResultTypes = ArrayRef(CallResultType);
auto CurResultType = ResultTypes.begin();
auto handleRegAssign = [&](SDValue V) {
@@ -9327,12 +9576,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
- SDValue Chain, InFlag, Callee, NullPtr;
+ SDValue Chain, InFlag, Callee;
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
Callee = getValue(CI.getCalledOperand());
- NullPtr = DAG.getIntPtrConstant(0, DL, true);
// The stackmap intrinsic only records the live variables (the arguments
// passed to it) and emits NOPS (if requested). Unlike the patchpoint
@@ -9375,7 +9623,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL);
// Stackmaps don't generate values, so nothing goes into the NodeMap.
@@ -9693,6 +9941,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.Alignment = Alignment;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.NumFixedArgs += 1;
+ CLI.getArgs()[0].IndirectType = CLI.RetTy;
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
// sret demotion isn't compatible with tail-calls, since the sret argument
@@ -9897,7 +10146,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ISD::OutputArg MyFlags(
Flags, Parts[j].getValueType().getSimpleVT(), VT,
i < CLI.NumFixedArgs, i,
- j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
+ j * Parts[j].getValueType().getStoreSize().getKnownMinValue());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9985,7 +10234,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
} else {
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
- Optional<ISD::NodeType> AssertOp;
+ std::optional<ISD::NodeType> AssertOp;
if (CLI.RetSExt)
AssertOp = ISD::AssertSext;
else if (CLI.RetZExt)
@@ -10063,7 +10312,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
// notional registers required by the type.
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
- None); // This is not an ABI copy.
+ std::nullopt); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
if (ExtendType == ISD::ANY_EXTEND) {
@@ -10424,8 +10673,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// For scalable vectors, use the minimum size; individual targets
// are responsible for handling scalable vector arguments and
// return values.
- ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
- ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
+ ISD::InputArg MyFlags(
+ Flags, RegisterVT, VT, isArgValueUsed, ArgNo,
+ PartBase + i * RegisterVT.getStoreSize().getKnownMinValue());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -10438,7 +10688,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (NeedsRegBlock && Value == NumValues - 1)
Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
- PartBase += VT.getStoreSize().getKnownMinSize();
+ PartBase += VT.getStoreSize().getKnownMinValue();
}
}
@@ -10476,7 +10726,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
- Optional<ISD::NodeType> AssertOp = None;
+ std::optional<ISD::NodeType> AssertOp;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
nullptr, F.getCallingConv(), AssertOp);
@@ -10538,7 +10788,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// we do generate a copy for it that can be used on return from the
// function.
if (ArgHasUses || isSwiftErrorArg) {
- Optional<ISD::NodeType> AssertOp;
+ std::optional<ISD::NodeType> AssertOp;
if (Arg.hasAttribute(Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
@@ -10561,7 +10811,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
- SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
+ SDValue Res = DAG.getMergeValues(ArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
SDB->setValue(&Arg, Res);
@@ -10644,14 +10894,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
// Check PHI nodes in successors that expect a value to be available from this
// block.
- for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
- const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ for (const BasicBlock *SuccBB : successors(LLVMBB->getTerminator())) {
if (!isa<PHINode>(SuccBB->begin())) continue;
MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
@@ -10677,7 +10925,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
unsigned Reg;
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
- if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+ if (const auto *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
RegOut = FuncInfo.CreateRegs(C);
@@ -10708,10 +10956,9 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
- for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
- EVT VT = ValueVTs[vti];
- unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
- for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ for (EVT VT : ValueVTs) {
+ const unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0; i != NumRegisters; ++i)
FuncInfo.PHINodesToUpdate.push_back(
std::make_pair(&*MBBI++, Reg + i));
Reg += NumRegisters;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d1915fd4e7ae..bf2111013461 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -33,6 +34,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <optional>
#include <utility>
#include <vector>
@@ -42,6 +44,7 @@ class AAResults;
class AllocaInst;
class AtomicCmpXchgInst;
class AtomicRMWInst;
+class AssumptionCache;
class BasicBlock;
class BranchInst;
class CallInst;
@@ -103,20 +106,68 @@ class SelectionDAGBuilder {
/// Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
- const DbgValueInst* DI = nullptr;
- DebugLoc dl;
+ using DbgValTy = const DbgValueInst *;
+ using VarLocTy = const VarLocInfo *;
+ PointerUnion<DbgValTy, VarLocTy> Info;
unsigned SDNodeOrder = 0;
public:
DanglingDebugInfo() = default;
- DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
- : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
-
- const DbgValueInst* getDI() { return DI; }
- DebugLoc getdl() { return dl; }
- unsigned getSDNodeOrder() { return SDNodeOrder; }
+ DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO)
+ : Info(DI), SDNodeOrder(SDNO) {}
+ DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO)
+ : Info(VarLoc), SDNodeOrder(SDNO) {}
+
+ DILocalVariable *getVariable(const FunctionVarLocs *Locs) const {
+ if (Info.is<VarLocTy>())
+ return Locs->getDILocalVariable(Info.get<VarLocTy>()->VariableID);
+ return Info.get<DbgValTy>()->getVariable();
+ }
+ DIExpression *getExpression() const {
+ if (Info.is<VarLocTy>())
+ return Info.get<VarLocTy>()->Expr;
+ return Info.get<DbgValTy>()->getExpression();
+ }
+ Value *getVariableLocationOp(unsigned Idx) const {
+ assert(Idx == 0 && "Dangling variadic debug values not supported yet");
+ if (Info.is<VarLocTy>())
+ return Info.get<VarLocTy>()->V;
+ return Info.get<DbgValTy>()->getVariableLocationOp(Idx);
+ }
+ DebugLoc getDebugLoc() const {
+ if (Info.is<VarLocTy>())
+ return Info.get<VarLocTy>()->DL;
+ return Info.get<DbgValTy>()->getDebugLoc();
+ }
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ /// Helper for printing DanglingDebugInfo. This hoop-jumping is to
+ /// accommodate the fact that an argument is required for getVariable.
+ /// Call SelectionDAGBuilder::printDDI instead of using directly.
+ struct Print {
+ Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs)
+ : DDI(DDI), VarLocs(VarLocs) {}
+ const DanglingDebugInfo &DDI;
+ const FunctionVarLocs *VarLocs;
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const DanglingDebugInfo::Print &P) {
+ OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs)
+ << ", val= " << *P.DDI.getVariableLocationOp(0)
+ << ", expr=" << *P.DDI.getExpression()
+ << ", order=" << P.DDI.getSDNodeOrder()
+ << ", loc=" << P.DDI.getDebugLoc() << ")";
+ return OS;
+ }
+ };
};
+ /// Returns an object that defines `raw_ostream &operator<<` for printing.
+ /// Usage example:
+ //// errs() << printDDI(MyDanglingInfo) << " is dangling\n";
+ DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) {
+ return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs());
+ }
+
/// Helper type for DanglingDebugInfoMap.
typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
@@ -191,6 +242,7 @@ public:
SelectionDAG &DAG;
AAResults *AA = nullptr;
+ AssumptionCache *AC = nullptr;
const TargetLibraryInfo *LibInfo;
class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
@@ -244,7 +296,7 @@ public:
SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
SwiftError(swifterror) {}
- void init(GCFunctionInfo *gfi, AAResults *AA,
+ void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC,
const TargetLibraryInfo *li);
/// Clear out the current SelectionDAG and the associated state and prepare
@@ -296,8 +348,8 @@ public:
SDValue getCopyFromRegs(const Value *V, Type *Ty);
/// Register a dbg_value which relies on a Value which we have not yet seen.
- void addDanglingDebugInfo(const DbgValueInst *DI, DebugLoc DL,
- unsigned Order);
+ void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order);
+ void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order);
/// If we have dangling debug info that describes \p Variable, or an
/// overlapping part of variable considering the \p Expr, then this method
@@ -317,8 +369,8 @@ public:
/// For a given list of Values, attempt to create and record a SDDbgValue in
/// the SelectionDAG.
bool handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var,
- DIExpression *Expr, DebugLoc CurDL, DebugLoc InstDL,
- unsigned Order, bool IsVariadic);
+ DIExpression *Expr, DebugLoc DbgLoc, unsigned Order,
+ bool IsVariadic);
/// Evict any dangling debug information, attempting to salvage it first.
void resolveOrClearDbgInfo();
@@ -567,10 +619,14 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
- void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues, bool IsGather);
- void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues, bool IsScatter);
+ void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues);
+ void visitVPStore(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues);
+ void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues);
+ void visitVPScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues);
void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT,
SmallVectorImpl<SDValue> &OpValues);
void visitVPStridedStore(const VPIntrinsic &VPIntrin,
@@ -680,14 +736,14 @@ struct RegsForValue {
/// Records if this value needs to be treated in an ABI dependant manner,
/// different to normal type legalization.
- Optional<CallingConv::ID> CallConv;
+ std::optional<CallingConv::ID> CallConv;
RegsForValue() = default;
RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
- Optional<CallingConv::ID> CC = None);
+ std::optional<CallingConv::ID> CC = std::nullopt);
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- Optional<CallingConv::ID> CC);
+ std::optional<CallingConv::ID> CC);
bool isABIMangled() const { return CallConv.has_value(); }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 6ba01664e756..fe4261291fc5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -79,6 +79,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::DELETED_NODE: return "<<Deleted Node!>>";
#endif
case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
@@ -95,6 +96,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
+ case ISD::ATOMIC_LOAD_UINC_WRAP:
+ return "AtomicLoadUIncWrap";
+ case ISD::ATOMIC_LOAD_UDEC_WRAP:
+ return "AtomicLoadUDecWrap";
case ISD::ATOMIC_LOAD: return "AtomicLoad";
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
@@ -422,7 +427,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "call_alloc";
// Floating point environment manipulation
- case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::GET_ROUNDING: return "get_rounding";
case ISD::SET_ROUNDING: return "set_rounding";
// Bit manipulation
@@ -1059,6 +1064,9 @@ LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
printr(OS, G);
+ // Under VerboseDAGDumping divergence will be printed always.
+ if (isDivergent() && !VerboseDAGDumping)
+ OS << " # D:1";
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
if (i) OS << ", "; else OS << " ";
printOperand(OS, G, getOperand(i));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index d46a0a23cca3..902f46115557 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -31,6 +32,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -61,6 +63,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -101,6 +104,7 @@
#include <iterator>
#include <limits>
#include <memory>
+#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -309,7 +313,8 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL)
+SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm,
+ CodeGenOpt::Level OL)
: MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
SwiftError(new SwiftErrorValueTracking()),
CurDAG(new SelectionDAG(tm, OL)),
@@ -336,9 +341,14 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for
+ // the module.
+ AU.addRequired<AssignmentTrackingAnalysis>();
+ AU.addPreserved<AssignmentTrackingAnalysis>();
if (OptLevel != CodeGenOpt::None)
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -382,8 +392,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Decide what flavour of variable location debug-info will be used, before
// we change the optimisation level.
- UseInstrRefDebugInfo = mf.useDebugInstrRef();
- CurDAG->useInstrRefDebugInfo(UseInstrRefDebugInfo);
+ bool InstrRef = mf.shouldUseDebugInstrRef();
+ mf.setUseDebugInstrRef(InstrRef);
// Reset the target options before resetting the optimization
// level below.
@@ -403,15 +413,21 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction());
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
BlockFrequencyInfo *BFI = nullptr;
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
+ FunctionVarLocs const *FnVarLocs = nullptr;
+ if (isAssignmentTrackingEnabled(*Fn.getParent()))
+ FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults();
+
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
+ getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI,
+ FnVarLocs);
FuncInfo->set(Fn, *MF, CurDAG);
SwiftError->setFunction(*MF);
@@ -430,7 +446,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
else
AA = nullptr;
- SDB->init(GFI, AA, LibInfo);
+ SDB->init(GFI, AA, AC, LibInfo);
MF->setHasInlineAsm(false);
@@ -488,7 +504,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
+ if (From.isVirtual() && To.isVirtual())
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -530,15 +546,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
- bool InstrRef = MF->useDebugInstrRef();
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
"Function parameters should not be described by DBG_VALUE_LIST.");
- bool hasFI = MI->getOperand(0).isFI();
+ bool hasFI = MI->getDebugOperand(0).isFI();
Register Reg =
- hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
- if (Register::isPhysicalRegister(Reg))
+ hasFI ? TRI.getFrameRegister(*MF) : MI->getDebugOperand(0).getReg();
+ if (Reg.isPhysical())
EntryMBB->insert(EntryMBB->begin(), MI);
else {
MachineInstr *Def = RegInfo->getVRegDef(Reg);
@@ -567,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DebugLoc DL = MI->getDebugLoc();
bool IsIndirect = MI->isIndirectDebugValue();
if (IsIndirect)
- assert(MI->getOperand(1).getImm() == 0 &&
+ assert(MI->getDebugOffset().getImm() == 0 &&
"DBG_VALUE with nonzero offset");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -608,7 +623,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// For debug-info, in instruction referencing mode, we need to perform some
// post-isel maintenence.
- if (UseInstrRefDebugInfo)
+ if (MF->useDebugInstrRef())
MF->finalizeDebugInstrRefs();
// Determine if there are any calls in this machine function.
@@ -997,6 +1012,15 @@ public:
if (ISelPosition == SelectionDAG::allnodes_iterator(N))
++ISelPosition;
}
+
+ /// NodeInserted - Handle new nodes inserted into the graph: propagate
+ /// metadata from root nodes that also applies to new nodes, in case the root
+ /// is later deleted.
+ void NodeInserted(SDNode *N) override {
+ SDNode *CurNode = &*ISelPosition;
+ if (MDNode *MD = DAG.getPCSections(CurNode))
+ DAG.addPCSections(N, MD);
+ }
};
} // end anonymous namespace
@@ -1073,7 +1097,7 @@ void SelectionDAGISel::DoInstructionSelection() {
++ISelPosition;
// Make sure that ISelPosition gets properly updated when nodes are deleted
- // in calls made from this function.
+ // in calls made from this function. New nodes inherit relevant metadata.
ISelUpdater ISU(*CurDAG, ISelPosition);
// The AllNodes list is now topological-sorted. Visit the
@@ -1181,11 +1205,11 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
// In case of single catch (...), we don't emit LSDA, so we don't need
// this information.
bool IsSingleCatchAllClause =
- CPI->getNumArgOperands() == 1 &&
+ CPI->arg_size() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue();
// cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
// and they don't need LSDA info
- bool IsCatchLongjmp = CPI->getNumArgOperands() == 0;
+ bool IsCatchLongjmp = CPI->arg_size() == 0;
if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
// Create a mapping from landing pad label to landing pad index.
bool IntrFound = false;
@@ -1279,56 +1303,75 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo.isExportedInst(I); // Exported instrs must be computed.
}
+static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
+ const Value *Address, DIExpression *Expr,
+ DILocalVariable *Var, DebugLoc DbgLoc) {
+ MachineFunction *MF = FuncInfo.MF;
+ const DataLayout &DL = MF->getDataLayout();
+
+ assert(Var && "Missing variable");
+ assert(DbgLoc && "Missing location");
+
+ // Look through casts and constant offset GEPs. These mostly come from
+ // inalloca.
+ APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
+ Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // Check if the variable is a static alloca or a byval or inalloca
+ // argument passed in memory. If it is not, then we will ignore this
+ // intrinsic and handle this during isel like dbg.value.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ FI = SI->second;
+ } else if (const auto *Arg = dyn_cast<Argument>(Address))
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
+
+ if (FI == std::numeric_limits<int>::max())
+ return;
+
+ if (Offset.getBoolValue())
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
+ Offset.getZExtValue());
+
+ LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
+ << ", Expr=" << *Expr << ", FI=" << FI
+ << ", DbgLoc=" << DbgLoc << "\n");
+ MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc);
+}
+
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
- MachineFunction *MF = FuncInfo.MF;
- const DataLayout &DL = MF->getDataLayout();
for (const BasicBlock &BB : *FuncInfo.Fn) {
for (const Instruction &I : BB) {
- const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
- if (!DI)
- continue;
-
- assert(DI->getVariable() && "Missing variable");
- assert(DI->getDebugLoc() && "Missing location");
- const Value *Address = DI->getAddress();
- if (!Address) {
- LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
- << " (bad address)\n");
- continue;
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
+ Value *Address = DI->getAddress();
+ if (!Address) {
+ LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
+ << " (bad address)\n");
+ continue;
+ }
+ processDbgDeclare(FuncInfo, Address, DI->getExpression(),
+ DI->getVariable(), DI->getDebugLoc());
}
-
- // Look through casts and constant offset GEPs. These mostly come from
- // inalloca.
- APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
- Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-
- // Check if the variable is a static alloca or a byval or inalloca
- // argument passed in memory. If it is not, then we will ignore this
- // intrinsic and handle this during isel like dbg.value.
- int FI = std::numeric_limits<int>::max();
- if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
- auto SI = FuncInfo.StaticAllocaMap.find(AI);
- if (SI != FuncInfo.StaticAllocaMap.end())
- FI = SI->second;
- } else if (const auto *Arg = dyn_cast<Argument>(Address))
- FI = FuncInfo.getArgumentFrameIndex(Arg);
-
- if (FI == std::numeric_limits<int>::max())
- continue;
-
- DIExpression *Expr = DI->getExpression();
- if (Offset.getBoolValue())
- Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
- Offset.getZExtValue());
- LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI
- << ", " << *DI << "\n");
- MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
}
}
}
+/// Collect single location variable information generated with assignment
+/// tracking. This is done after argument lowering in case the declarations
+/// refer to arguments.
+static void processSingleLocVars(FunctionLoweringInfo &FuncInfo,
+ FunctionVarLocs const *FnVarLocs) {
+ for (auto It = FnVarLocs->single_locs_begin(),
+ End = FnVarLocs->single_locs_end();
+ It != End; ++It)
+ processDbgDeclare(FuncInfo, It->V, It->Expr,
+ FnVarLocs->getDILocalVariable(It->VariableID), It->DL);
+}
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
@@ -1336,8 +1379,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (TM.Options.EnableFastISel) {
LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
- if (FastIS)
- FastIS->useInstrRefDebugInfo(UseInstrRefDebugInfo);
}
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -1391,7 +1432,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (FastIS && Inserted)
FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
- processDbgDeclares(*FuncInfo);
+ if (isAssignmentTrackingEnabled(*Fn.getParent())) {
+ assert(CurDAG->getFunctionVarLocs() &&
+ "expected AssignmentTrackingAnalysis pass results");
+ processSingleLocVars(*FuncInfo, CurDAG->getFunctionVarLocs());
+ } else {
+ processDbgDeclares(*FuncInfo);
+ }
// Iterate over all basic blocks in the function.
StackProtector &SP = getAnalysis<StackProtector>();
@@ -1957,7 +2004,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
while (i != e) {
unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
- if (!InlineAsm::isMemKind(Flags)) {
+ if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) {
// Just skip over this operand, copying the operands verbatim.
Ops.insert(Ops.end(), InOps.begin()+i,
InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
@@ -1986,7 +2033,9 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
// Add this to the output node.
unsigned NewFlags =
- InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ InlineAsm::isMemKind(Flags)
+ ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size())
+ : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size());
NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
llvm::append_range(Ops, SelOps);
@@ -2193,6 +2242,11 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::MEMBARRIER, N->getValueType(0),
+ N->getOperand(0));
+}
+
void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
SDValue OpVal, SDLoc DL) {
SDNode *OpNode = OpVal.getNode();
@@ -2249,7 +2303,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) {
// Cache arguments that will be moved to the end in the target node.
SDValue Chain = *It++;
- Optional<SDValue> Glue;
+ std::optional<SDValue> Glue;
if (It->getValueType() == MVT::Glue)
Glue = *It++;
SDValue RegMask = *It++;
@@ -2287,7 +2341,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) {
Ops.push_back(RegMask);
Ops.push_back(Chain);
if (Glue.has_value())
- Ops.push_back(Glue.value());
+ Ops.push_back(*Glue);
SDVTList NodeTys = N->getVTList();
CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops);
@@ -2847,6 +2901,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::ARITH_FENCE:
Select_ARITH_FENCE(NodeToMatch);
return;
+ case ISD::MEMBARRIER:
+ Select_MEMBARRIER(NodeToMatch);
+ return;
case ISD::STACKMAP:
Select_STACKMAP(NodeToMatch);
return;
@@ -3764,5 +3821,3 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
}
report_fatal_error(Twine(Msg.str()));
}
-
-char SelectionDAGISel::ID = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index c5c093ae228f..57bfe344dbab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -14,8 +14,6 @@
#include "StatepointLowering.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -160,12 +158,12 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
/// Utility function for reservePreviousStackSlotForValue. Tries to find
/// stack slot index to which we have spilled value for previous statepoints.
/// LookUpDepth specifies maximum DFS depth this function is allowed to look.
-static Optional<int> findPreviousSpillSlot(const Value *Val,
- SelectionDAGBuilder &Builder,
- int LookUpDepth) {
+static std::optional<int> findPreviousSpillSlot(const Value *Val,
+ SelectionDAGBuilder &Builder,
+ int LookUpDepth) {
// Can not look any further - give up now
if (LookUpDepth <= 0)
- return None;
+ return std::nullopt;
// Spill location is known for gc relocates
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
@@ -173,18 +171,18 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) &&
"GetStatepoint must return one of two types");
if (isa<UndefValue>(Statepoint))
- return None;
+ return std::nullopt;
const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps
[cast<GCStatepointInst>(Statepoint)];
auto It = RelocationMap.find(Relocate);
if (It == RelocationMap.end())
- return None;
+ return std::nullopt;
auto &Record = It->second;
if (Record.type != RecordType::Spill)
- return None;
+ return std::nullopt;
return Record.payload.FI;
}
@@ -197,16 +195,16 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// All incoming values should have same known stack slot, otherwise result
// is unknown.
if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
- Optional<int> MergedResult = None;
+ std::optional<int> MergedResult;
for (const auto &IncomingValue : Phi->incoming_values()) {
- Optional<int> SpillSlot =
+ std::optional<int> SpillSlot =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
if (!SpillSlot)
- return None;
+ return std::nullopt;
if (MergedResult && *MergedResult != *SpillSlot)
- return None;
+ return std::nullopt;
MergedResult = SpillSlot;
}
@@ -241,7 +239,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// which we visit values is unspecified.
// Don't know any information about this instruction
- return None;
+ return std::nullopt;
}
/// Return true if-and-only-if the given SDValue can be lowered as either a
@@ -284,7 +282,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
return;
const int LookUpDepth = 6;
- Optional<int> Index =
+ std::optional<int> Index =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
if (!Index)
return;
@@ -321,7 +319,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
/// reference lowered call result
static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
SelectionDAGBuilder::StatepointLoweringInfo &SI,
- SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
+ SelectionDAGBuilder &Builder) {
SDValue ReturnValue, CallEndVal;
std::tie(ReturnValue, CallEndVal) =
Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
@@ -526,34 +524,6 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
// Lower the deopt and gc arguments for this statepoint. Layout will be:
// deopt argument length, deopt arguments.., gc arguments...
-#ifndef NDEBUG
- if (auto *GFI = Builder.GFI) {
- // Check that each of the gc pointer and bases we've gotten out of the
- // safepoint is something the strategy thinks might be a pointer (or vector
- // of pointers) into the GC heap. This is basically just here to help catch
- // errors during statepoint insertion. TODO: This should actually be in the
- // Verifier, but we can't get to the GCStrategy from there (yet).
- GCStrategy &S = GFI->getStrategy();
- for (const Value *V : SI.Bases) {
- auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt) {
- assert(Opt.value() &&
- "non gc managed base pointer found in statepoint");
- }
- }
- for (const Value *V : SI.Ptrs) {
- auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt) {
- assert(Opt.value() &&
- "non gc managed derived pointer found in statepoint");
- }
- }
- assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
- } else {
- assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
- assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
- }
-#endif
// Figure out what lowering strategy we're going to use for each part
// Note: Is is conservatively correct to lower both "live-in" and "live-out"
@@ -742,7 +712,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
NumOfStatepoints++;
// Clear state
StatepointLowering.startNewStatepoint(*this);
- assert(SI.Bases.size() == SI.Ptrs.size());
+ assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
+ assert((GFI || SI.Bases.empty()) &&
+ "No gc specified, so cannot relocate pointers!");
LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
@@ -770,8 +742,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Get call node, we will replace it later with statepoint
SDValue ReturnVal;
SDNode *CallNode;
- std::tie(ReturnVal, CallNode) =
- lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports);
+ std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this);
// Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
// nodes with all the appropriate arguments and return values.
@@ -921,7 +892,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
auto *RetTy = Relocate->getType();
Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy, None);
+ DAG.getDataLayout(), Reg, RetTy, std::nullopt);
SDValue Chain = DAG.getRoot();
RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
PendingExports.push_back(Chain);
@@ -1148,7 +1119,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
// TODO: To eliminate this problem we can remove gc.result intrinsics
// completely and make statepoint call to return a tuple.
Type *RetTy = GCResultLocality.second->getType();
- unsigned Reg = FuncInfo.CreateRegs(RetTy);
+ Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy,
I.getCallingConv());
@@ -1239,10 +1210,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent())
StatepointLowering.relocCallVisited(Relocate);
-
- auto *Ty = Relocate.getType()->getScalarType();
- if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
- assert(*IsManaged && "Non gc managed pointer relocated!");
#endif
const Value *DerivedPtr = Relocate.getDerivedPtr();
@@ -1266,7 +1233,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
Register InReg = Record.payload.Reg;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Relocate.getType(),
- None); // This is not an ABI copy.
+ std::nullopt); // This is not an ABI copy.
// We generate copy to/from regs even for local uses, hence we must
// chain with current root to ensure proper ordering of copies w.r.t.
// statepoint.
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6205e74837c0..8d4c8802f71c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -260,7 +261,7 @@ bool TargetLowering::findOptimalMemOpLowering(
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
+ unsigned Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
@@ -351,7 +352,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
break;
case ISD::SETO:
ShouldInvertCC = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -360,7 +361,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
case ISD::SETONE:
// SETONE = O && UNE
ShouldInvertCC = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -397,7 +398,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
}
}
- // Use the target specific return value for comparions lib calls.
+ // Use the target specific return value for comparison lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
TargetLowering::MakeLibCallOptions CallOptions;
@@ -633,35 +634,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
- // TODO: We can probably do more work on calculating the known bits and
- // simplifying the operations for scalable vectors, but for now we just
- // bail out.
- if (VT.isScalableVector()) {
- // Pretend we don't know anything for now.
- Known = KnownBits(DemandedBits.getBitWidth());
- return false;
- }
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
}
-// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
// TODO: Under what circumstances can we create nodes? Constant folding?
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
EVT VT = Op.getValueType();
- // Pretend we don't know anything about scalable vectors for now.
- // TODO: We can probably do more work on simplifying the operations for
- // scalable vectors, but for now we just bail out.
- if (VT.isScalableVector())
- return SDValue();
-
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return SDValue();
@@ -680,6 +668,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ return SDValue();
+
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
@@ -825,6 +816,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ if (VT.isScalableVector())
+ return SDValue();
+
// If we only want the lowest element and none of extended bits, then we can
// return the bitcasted source vector.
SDValue Src = Op.getOperand(0);
@@ -838,6 +832,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ return SDValue();
+
// If we don't demand the inserted element, return the base vector.
SDValue Vec = Op.getOperand(0);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -848,6 +845,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return SDValue();
+
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
@@ -857,6 +857,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
if (DemandedSubElts == 0)
return Vec;
// If this simply widens the lowest subvector, see if we can do it earlier.
+ // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
+ // general nodes like this.
if (Idx == 0 && Vec.isUndef()) {
if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
@@ -866,6 +868,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::VECTOR_SHUFFLE: {
+ assert(!VT.isScalableVector());
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If all the demanded elts are from one operand and are inline,
@@ -889,6 +892,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
default:
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (VT.isScalableVector())
+ return SDValue();
+
if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth))
@@ -902,14 +910,10 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
unsigned Depth) const {
EVT VT = Op.getValueType();
-
- // Pretend we don't know anything about scalable vectors for now.
- // TODO: We can probably do more work on simplifying the operations for
- // scalable vectors, but for now we just bail out.
- if (VT.isScalableVector())
- return SDValue();
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
@@ -1068,16 +1072,10 @@ bool TargetLowering::SimplifyDemandedBits(
// Don't know anything.
Known = KnownBits(BitWidth);
- // TODO: We can probably do more work on calculating the known bits and
- // simplifying the operations for scalable vectors, but for now we just
- // bail out.
EVT VT = Op.getValueType();
- if (VT.isScalableVector())
- return false;
-
bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
unsigned NumElts = OriginalDemandedElts.getBitWidth();
- assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) &&
+ assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
"Unexpected vector size");
APInt DemandedBits = OriginalDemandedBits;
@@ -1089,6 +1087,10 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.isUndef())
return false;
+ // We can't simplify target constants.
+ if (Op.getOpcode() == ISD::TargetConstant)
+ return false;
+
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
@@ -1103,17 +1105,16 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Other users may use these bits.
- if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
- if (Depth != 0) {
- // If not at the root, Just compute the Known bits to
- // simplify things downstream.
- Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ bool HasMultiUse = false;
+ if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
+ if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
return false;
}
- // If this is the root being simplified, allow it to have multiple uses,
- // just set the DemandedBits/Elts to all bits.
+ // Allow multiple uses, just set the DemandedBits/Elts to all bits.
DemandedBits = APInt::getAllOnes(BitWidth);
DemandedElts = APInt::getAllOnes(NumElts);
+ HasMultiUse = true;
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1124,9 +1125,9 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known2;
switch (Op.getOpcode()) {
- case ISD::TargetConstant:
- llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
+ if (VT.isScalableVector())
+ return false;
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1164,6 +1165,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ return false;
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -1200,6 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits(
return false;
}
case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return false;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
@@ -1243,6 +1248,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::EXTRACT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return false;
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
if (Src.getValueType().isScalableVector())
@@ -1268,6 +1275,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::CONCAT_VECTORS: {
+ if (VT.isScalableVector())
+ return false;
Known.Zero.setAllBits();
Known.One.setAllBits();
EVT SubVT = Op.getOperand(0).getValueType();
@@ -1286,28 +1295,14 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::VECTOR_SHUFFLE: {
+ assert(!VT.isScalableVector());
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
- APInt DemandedLHS(NumElts, 0);
- APInt DemandedRHS(NumElts, 0);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (!DemandedElts[i])
- continue;
- int M = ShuffleMask[i];
- if (M < 0) {
- // For UNDEF elements, we don't know anything about the common state of
- // the shuffle result.
- DemandedLHS.clearAllBits();
- DemandedRHS.clearAllBits();
- break;
- }
- assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
- if (M < (int)NumElts)
- DemandedLHS.setBit(M);
- else
- DemandedRHS.setBit(M - NumElts);
- }
+ APInt DemandedLHS, DemandedRHS;
+ if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
+ DemandedRHS))
+ break;
if (!!DemandedLHS || !!DemandedRHS) {
SDValue Op0 = Op.getOperand(0);
@@ -1378,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits(
// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
- if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
(Op0.getOperand(0).isUndef() ||
ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
Op0->hasOneUse()) {
@@ -1745,7 +1740,7 @@ bool TargetLowering::SimplifyDemandedBits(
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
// TODO - support non-uniform vector amounts.
- if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
InnerOp.hasOneUse()) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
@@ -1879,6 +1874,16 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One.lshrInPlace(ShAmt);
// High bits known zero.
Known.Zero.setHighBits(ShAmt);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
break;
}
@@ -2081,10 +2086,10 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umin(Known0, Known1);
- if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
- return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1);
- if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
- return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1);
+ if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
+ if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
break;
}
case ISD::UMAX: {
@@ -2094,10 +2099,10 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umax(Known0, Known1);
- if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
- return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1);
- if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
- return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1);
+ if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
+ if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
break;
}
case ISD::BITREVERSE: {
@@ -2225,19 +2230,18 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
return true;
- Known.Zero = KnownLo.Zero.zext(BitWidth) |
- KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
-
- Known.One = KnownLo.One.zext(BitWidth) |
- KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
+ Known = KnownHi.concat(KnownLo);
break;
}
- case ISD::ZERO_EXTEND:
- case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::ZERO_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
- unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
@@ -2269,12 +2273,15 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
- case ISD::SIGN_EXTEND:
- case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::SIGN_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
- unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
@@ -2321,12 +2328,15 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
- case ISD::ANY_EXTEND:
- case ISD::ANY_EXTEND_VECTOR_INREG: {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::ANY_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
- unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
// If we only need the bottom element then we can just bitcast.
@@ -2369,18 +2379,18 @@ bool TargetLowering::SimplifyDemandedBits(
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
- if (Src.getNode()->hasOneUse()) {
- switch (Src.getOpcode()) {
- default:
+ switch (Src.getOpcode()) {
+ default:
+ break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
break;
- case ISD::SRL:
- // Shrink SRL by a constant if none of the high bits shifted in are
- // demanded.
- if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
- // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
- // undesirable.
- break;
+ if (Src.getNode()->hasOneUse()) {
const APInt *ShAmtC =
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
if (!ShAmtC || ShAmtC->uge(BitWidth))
@@ -2402,8 +2412,8 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
}
- break;
}
+ break;
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -2420,6 +2430,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero |= ~InMask;
+ Known.One &= (~Known.Zero);
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
@@ -2464,6 +2475,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ return false;
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
@@ -2576,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
return TLO.CombineTo(Op, And1);
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ADD:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
@@ -2601,6 +2614,11 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
}
+ // neg x with only low bit demanded is simply x.
+ if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
+ isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
+ return TLO.CombineTo(Op, Op1);
+
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
@@ -2679,10 +2697,16 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
default:
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ // We also ask the target about intrinsics (which could be specific to it).
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (Op.getValueType().isScalableVector())
+ break;
if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
Known, TLO, Depth))
return true;
@@ -2715,6 +2739,12 @@ bool TargetLowering::SimplifyDemandedBits(
APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
}
+ // A multi use 'all demanded elts' simplify failed to find any knownbits.
+ // Try again just for the original demanded elts.
+ // Ensure we do this AFTER constant folding above.
+ if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
+ Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
+
return false;
}
@@ -2746,7 +2776,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
"Vector binop only");
EVT EltVT = VT.getVectorElementType();
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
assert(UndefOp0.getBitWidth() == NumElts &&
UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
@@ -2814,7 +2844,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
// If Op has other users, assume that all elements are needed.
- if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
+ if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
DemandedElts.setAllBits();
// Not demanding any elements from Op.
@@ -3176,6 +3206,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::VECTOR_SHUFFLE: {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
@@ -3195,17 +3227,17 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// See if we can simplify either shuffle operand.
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
// Simplify mask using undef elements from LHS/RHS.
bool Updated = false;
bool IdentityLHS = true, IdentityRHS = true;
- SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
+ SmallVector<int, 32> NewMask(ShuffleMask);
for (unsigned i = 0; i != NumElts; ++i) {
int &M = NewMask[i];
if (M < 0)
@@ -3223,8 +3255,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// to Identity which can cause premature removal of the shuffle mask.
if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
SDValue LegalShuffle =
- buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
- NewMask, TLO.DAG);
+ buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
if (LegalShuffle)
return TLO.CombineTo(Op, LegalShuffle);
}
@@ -3307,7 +3338,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
Depth + 1, /*AssumeSingleUse*/ true))
return true;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case ISD::OR:
case ISD::XOR:
@@ -3367,6 +3398,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::MUL:
+ case ISD::MULHU:
+ case ISD::MULHS:
case ISD::AND: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -3375,10 +3408,16 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
+ // If we know that a demanded element was zero in Op1 we don't need to
+ // demand it in Op0 - its guaranteed to be zero.
+ APInt DemandedElts0 = DemandedElts & ~SrcZero;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
TLO, Depth + 1))
return true;
+ KnownUndef &= DemandedElts0;
+ KnownZero &= DemandedElts0;
+
// If every element pair has a zero/undef then just fold to zero.
// fold (and x, undef) -> 0 / (and x, 0) -> 0
// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
@@ -3566,6 +3605,19 @@ bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
return false;
}
+bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use canCreateUndefOrPoison if you don't know whether Op"
+ " is a target node!");
+ // Be conservative and return true.
+ return true;
+}
+
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
@@ -3582,6 +3634,7 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
const APInt &DemandedElts,
APInt &UndefElts,
+ const SelectionDAG &DAG,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
@@ -3692,6 +3745,26 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
}
+ // Try to eliminate a power-of-2 mask constant by converting to a signbit
+ // test in a narrow type that we can truncate to with no cost. Examples:
+ // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
+ // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
+ // TODO: This conservatively checks for type legality on the source and
+ // destination types. That may inhibit optimizations, but it also
+ // allows setcc->shift transforms that may be more beneficial.
+ auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
+ isTypeLegal(OpVT) && N0.hasOneUse()) {
+ EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
+ AndC->getAPIntValue().getActiveBits());
+ if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
+ SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
+ SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
+ return DAG.getSetCC(DL, VT, Trunc, Zero,
+ Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
+ }
+ }
+
// Match these patterns in any of their permutations:
// (X & Y) == Y
// (X & Y) != Y
@@ -3968,14 +4041,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
EVT CTVT = CTPOP.getValueType();
SDValue CTOp = CTPOP.getOperand(0);
- // If this is a vector CTPOP, keep the CTPOP if it is legal.
- // TODO: Should we check if CTPOP is legal(or custom) for scalars?
- if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
- return SDValue();
-
+ // Expand a power-of-2-or-zero comparison based on ctpop:
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
+ // Keep the CTPOP if it is a legal vector op.
+ if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ return SDValue();
+
unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
return SDValue();
@@ -3994,16 +4067,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
}
- // If ctpop is not supported, expand a power-of-2 comparison based on it.
+ // Expand a power-of-2 comparison based on ctpop:
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
- // For scalars, keep CTPOP if it is legal or custom.
- if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
+ // Keep the CTPOP if it is legal.
+ if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
return SDValue();
- // This is based on X86's custom lowering for CTPOP which produces more
- // instructions than the expansion here.
- // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
- // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
assert(CTVT.isInteger());
@@ -4137,6 +4208,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SelectionDAG &DAG = DCI.DAG;
const DataLayout &Layout = DAG.getDataLayout();
EVT OpVT = N0.getValueType();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// Constant fold or commute setcc.
if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
@@ -4181,6 +4253,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
return V;
+ // For equality to 0 of a no-wrap multiply, decompose and test each op:
+ // X * Y == 0 --> (X == 0) || (Y == 0)
+ // X * Y != 0 --> (X != 0) && (Y != 0)
+ // TODO: This bails out if minsize is set, but if the target doesn't have a
+ // single instruction multiply for this type, it would likely be
+ // smaller to decompose.
+ if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
+ (N0->getFlags().hasNoUnsignedWrap() ||
+ N0->getFlags().hasNoSignedWrap()) &&
+ !Attr.hasFnAttr(Attribute::MinSize)) {
+ SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+ SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
+ unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
+ return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
+ }
+
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
@@ -4970,8 +5059,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Fold remainder of division by a constant.
if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
- AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
@@ -5221,6 +5308,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
+void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ return;
+}
+
std::pair<unsigned, const TargetRegisterClass *>
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
StringRef Constraint,
@@ -5334,11 +5427,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
break;
case InlineAsm::isLabel:
- OpInfo.CallOperandVal =
- cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo);
- OpInfo.ConstraintVT =
- getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType())
- .getSimpleVT();
+ OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
++LabelNo;
continue;
case InlineAsm::isClobber:
@@ -5944,54 +6033,68 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- bool UseNPQ = false;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Try to use leading zeros of the dividend to reduce the multiplier and
+ // avoid expensive fixups.
+ // TODO: Support vectors.
+ unsigned LeadingZeros = 0;
+ if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
+ assert(!isOneConstant(N1) && "Unexpected divisor");
+ LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
+ // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
+ // the dividend exceeds the leading zeros for the divisor.
+ LeadingZeros =
+ std::min(LeadingZeros,
+ cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros());
+ }
+
+ bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
- // FIXME: We should use a narrower constant when the upper
- // bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(Divisor);
- unsigned PreShift = 0, PostShift = 0;
-
- // If the divisor is even, we can avoid using the expensive fixup by
- // shifting the divided value upfront.
- if (magics.IsAdd && !Divisor[0]) {
- PreShift = Divisor.countTrailingZeros();
- // Get magic number for the shifted divisor.
- magics =
- UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
- assert(!magics.IsAdd && "Should use cheap fixup now");
- }
-
- unsigned SelNPQ;
- if (!magics.IsAdd || Divisor.isOne()) {
- assert(magics.ShiftAmount < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- PostShift = magics.ShiftAmount;
- SelNPQ = false;
+
+ SDValue PreShift, MagicFactor, NPQFactor, PostShift;
+
+ // Magic algorithm doesn't work for division by 1. We need to emit a select
+ // at the end.
+ if (Divisor.isOne()) {
+ PreShift = PostShift = DAG.getUNDEF(ShSVT);
+ MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
} else {
- PostShift = magics.ShiftAmount - 1;
- SelNPQ = true;
- }
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
- PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
- MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
- NPQFactors.push_back(
- DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
- : APInt::getZero(EltBits),
- dl, SVT));
- PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
- UseNPQ |= SelNPQ;
+ MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) &&
+ "Unexpected pre-shift");
+ PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
+ PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
+ NPQFactor = DAG.getConstant(
+ magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits),
+ dl, SVT);
+ UseNPQ |= magics.IsAdd;
+ UsePreShift |= magics.PreShift != 0;
+ UsePostShift |= magics.PostShift != 0;
+ }
+
+ PreShifts.push_back(PreShift);
+ MagicFactors.push_back(MagicFactor);
+ NPQFactors.push_back(NPQFactor);
+ PostShifts.push_back(PostShift);
return true;
};
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
// Collect the shifts/magic values from each element.
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
return SDValue();
@@ -6018,8 +6121,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
}
SDValue Q = N0;
- Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
- Created.push_back(Q.getNode());
+ if (UsePreShift) {
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+ Created.push_back(Q.getNode());
+ }
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHU = [&](SDValue X, SDValue Y) {
@@ -6068,8 +6173,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
Created.push_back(Q.getNode());
}
- Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
- Created.push_back(Q.getNode());
+ if (UsePostShift) {
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
+ Created.push_back(Q.getNode());
+ }
EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
@@ -6921,6 +7028,41 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
OptForSize, Cost, Depth))
return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
break;
+ case ISD::SELECT:
+ case ISD::VSELECT: {
+ // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
+ // iff at least one cost is cheaper and the other is neutral/cheaper
+ SDValue LHS = Op.getOperand(1);
+ NegatibleCost CostLHS = NegatibleCost::Expensive;
+ SDValue NegLHS =
+ getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
+ if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
+ RemoveDeadNode(NegLHS);
+ break;
+ }
+
+ // Prevent this node from being deleted by the next call.
+ Handles.emplace_back(NegLHS);
+
+ SDValue RHS = Op.getOperand(2);
+ NegatibleCost CostRHS = NegatibleCost::Expensive;
+ SDValue NegRHS =
+ getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
+
+ // We're done with the handles.
+ Handles.clear();
+
+ if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
+ (CostLHS != NegatibleCost::Cheaper &&
+ CostRHS != NegatibleCost::Cheaper)) {
+ RemoveDeadNode(NegLHS);
+ RemoveDeadNode(NegRHS);
+ break;
+ }
+
+ Cost = std::min(CostLHS, CostRHS);
+ return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
+ }
}
return SDValue();
@@ -7002,8 +7144,8 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
}
if (!VT.isVector() && Opcode == ISD::MUL &&
- DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
- DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
+ DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
+ DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
// The input values are both sign-extended.
// TODO non-MUL case?
if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
@@ -7014,8 +7156,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
}
unsigned ShiftAmount = OuterBitSize - InnerBitSize;
- EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
+ SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
if (!LH.getNode() && !RH.getNode() &&
isOperationLegalOrCustom(ISD::SRL, VT) &&
@@ -7122,6 +7263,190 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
return Ok;
}
+// Optimize unsigned division or remainder by constants for types twice as large
+// as a legal VT.
+//
+// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
+// can be computed
+// as:
+// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
+// Remainder = Sum % Constant
+// This is based on "Remainder by Summing Digits" from Hacker's Delight.
+//
+// For division, we can compute the remainder using the algorithm described
+// above, subtract it from the dividend to get an exact multiple of Constant.
+// Then multiply that extact multiply by the multiplicative inverse modulo
+// (1 << (BitWidth / 2)) to get the quotient.
+
+// If Constant is even, we can shift right the dividend and the divisor by the
+// number of trailing zeros in Constant before applying the remainder algorithm.
+// If we're after the quotient, we can subtract this value from the shifted
+// dividend and multiply by the multiplicative inverse of the shifted divisor.
+// If we want the remainder, we shift the value left by the number of trailing
+// zeros and add the bits that were shifted out of the dividend.
+bool TargetLowering::expandDIVREMByConstant(SDNode *N,
+ SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ SDValue LL, SDValue LH) const {
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+
+ // TODO: Support signed division/remainder.
+ if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
+ return false;
+ assert(
+ (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
+ "Unexpected opcode");
+
+ auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!CN)
+ return false;
+
+ APInt Divisor = CN->getAPIntValue();
+ unsigned BitWidth = Divisor.getBitWidth();
+ unsigned HBitWidth = BitWidth / 2;
+ assert(VT.getScalarSizeInBits() == BitWidth &&
+ HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
+
+ // Divisor needs to less than (1 << HBitWidth).
+ APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
+ if (Divisor.uge(HalfMaxPlus1))
+ return false;
+
+ // We depend on the UREM by constant optimization in DAGCombiner that requires
+ // high multiply.
+ if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
+ !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
+ return false;
+
+ // Don't expand if optimizing for size.
+ if (DAG.shouldOptForSize())
+ return false;
+
+ // Early out for 0 or 1 divisors.
+ if (Divisor.ule(1))
+ return false;
+
+ // If the divisor is even, shift it until it becomes odd.
+ unsigned TrailingZeros = 0;
+ if (!Divisor[0]) {
+ TrailingZeros = Divisor.countTrailingZeros();
+ Divisor.lshrInPlace(TrailingZeros);
+ }
+
+ SDLoc dl(N);
+ SDValue Sum;
+ SDValue PartialRem;
+
+ // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
+ // then add in the carry.
+ // TODO: If we can't split it in half, we might be able to split into 3 or
+ // more pieces using a smaller bit width.
+ if (HalfMaxPlus1.urem(Divisor).isOneValue()) {
+ assert(!LL == !LH && "Expected both input halves or no input halves!");
+ if (!LL) {
+ LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
+ DAG.getIntPtrConstant(1, dl));
+ }
+
+ // Shift the input by the number of TrailingZeros in the divisor. The
+ // shifted out bits will be added to the remainder later.
+ if (TrailingZeros) {
+ // Save the shifted off bits if we need the remainder.
+ if (Opcode != ISD::UDIV) {
+ APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
+ PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
+ DAG.getConstant(Mask, dl, HiLoVT));
+ }
+
+ LL = DAG.getNode(
+ ISD::OR, dl, HiLoVT,
+ DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
+ DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
+ DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
+ HiLoVT, dl)));
+ LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
+ }
+
+ // Use addcarry if we can, otherwise use a compare to detect overflow.
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
+ if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
+ SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
+ Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
+ Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
+ DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
+ } else {
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
+ SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
+ // If the boolean for the target is 0 or 1, we can add the setcc result
+ // directly.
+ if (getBooleanContents(HiLoVT) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
+ else
+ Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
+ DAG.getConstant(0, dl, HiLoVT));
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
+ }
+ }
+
+ // If we didn't find a sum, we can't do the expansion.
+ if (!Sum)
+ return false;
+
+ // Perform a HiLoVT urem on the Sum using truncated divisor.
+ SDValue RemL =
+ DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
+ DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
+ SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
+
+ if (Opcode != ISD::UREM) {
+ // Subtract the remainder from the shifted dividend.
+ SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+ SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
+
+ Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
+
+ // Multiply by the multiplicative inverse of the divisor modulo
+ // (1 << BitWidth).
+ APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
+ APInt MulFactor = Divisor.zext(BitWidth + 1);
+ MulFactor = MulFactor.multiplicativeInverse(Mod);
+ MulFactor = MulFactor.trunc(BitWidth);
+
+ SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
+ DAG.getConstant(MulFactor, dl, VT));
+
+ // Split the quotient into low and high parts.
+ SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(1, dl));
+ Result.push_back(QuotL);
+ Result.push_back(QuotH);
+ }
+
+ if (Opcode != ISD::UDIV) {
+ // If we shifted the input, shift the remainder left and add the bits we
+ // shifted off the input.
+ if (TrailingZeros) {
+ APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
+ RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
+ RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
+ }
+ Result.push_back(RemL);
+ Result.push_back(DAG.getConstant(0, dl, HiLoVT));
+ }
+
+ return true;
+}
+
// Check that (every element of) Z is undef or not an exact multiple of BW.
static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
return ISD::matchUnaryPredicate(
@@ -7130,8 +7455,68 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
true);
}
+static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
+ EVT VT = Node->getValueType(0);
+ SDValue ShX, ShY;
+ SDValue ShAmt, InvShAmt;
+ SDValue X = Node->getOperand(0);
+ SDValue Y = Node->getOperand(1);
+ SDValue Z = Node->getOperand(2);
+ SDValue Mask = Node->getOperand(3);
+ SDValue VL = Node->getOperand(4);
+
+ unsigned BW = VT.getScalarSizeInBits();
+ bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Z.getValueType();
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
+ VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
+ VL);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
+ DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
+ } else {
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
+ }
+
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
+ SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
+ } else {
+ SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
+ }
+ }
+ return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
+}
+
SDValue TargetLowering::expandFunnelShift(SDNode *Node,
SelectionDAG &DAG) const {
+ if (Node->isVPOpcode())
+ return expandVPFunnelShift(Node, DAG);
+
EVT VT = Node->getValueType(0);
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
@@ -7919,6 +8304,63 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getConstant(Len - 8, dl, ShVT));
}
+SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ unsigned Len = VT.getScalarSizeInBits();
+ assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
+
+ // TODO: Add support for irregular type lengths.
+ if (!(Len <= 128 && Len % 8 == 0))
+ return SDValue();
+
+ // This is same algorithm of expandCTPOP from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ SDValue Mask55 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+ SDValue Mask33 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+ SDValue Mask0F =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT), Mask, VL),
+ Mask55, Mask, VL);
+ Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
+
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT), Mask, VL),
+ Mask33, Mask, VL);
+ Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
+
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
+ Mask, VL),
+ Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
+ Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
+
+ if (Len <= 8)
+ return Op;
+
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ return DAG.getNode(ISD::VP_LSHR, dl, VT,
+ DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
+ DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
+}
+
SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
@@ -7969,6 +8411,77 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
+SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
+ VL);
+ }
+ Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
+ VL);
+ return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
+}
+
+SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
+ const SDLoc &DL, EVT VT, SDValue Op,
+ unsigned BitWidth) const {
+ if (BitWidth != 32 && BitWidth != 64)
+ return SDValue();
+ APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
+ : APInt(64, 0x0218A392CD3D5DBFULL);
+ const DataLayout &TD = DAG.getDataLayout();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
+ SDValue Lookup = DAG.getNode(
+ ISD::SRL, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
+ DAG.getConstant(DeBruijn, DL, VT)),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
+
+ SmallVector<uint8_t> Table(BitWidth, 0);
+ for (unsigned i = 0; i < BitWidth; i++) {
+ APInt Shl = DeBruijn.shl(i);
+ APInt Lshr = Shl.lshr(ShiftAmt);
+ Table[Lshr.getZExtValue()] = i;
+ }
+
+ // Create a ConstantArray in Constant Pool
+ auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
+ SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
+ TD.getPrefTypeAlign(CA->getType()));
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
+ DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
+ PtrInfo, MVT::i8);
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ return ExtLoad;
+
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, SrcIsZero,
+ DAG.getConstant(BitWidth, DL, VT), ExtLoad);
+}
+
SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
@@ -8002,6 +8515,12 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return SDValue();
+ // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
+ if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
+ !isOperationLegal(ISD::CTLZ, VT))
+ if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
+ return V;
+
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
@@ -8019,6 +8538,22 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
+SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+
+ // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
+ SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
+ DAG.getConstant(-1, dl, VT), Mask, VL);
+ SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
+ DAG.getConstant(1, dl, VT), Mask, VL);
+ SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
+ return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
+}
+
SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
bool IsNegative) const {
SDLoc dl(N);
@@ -8092,36 +8627,36 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
case MVT::i32:
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(0xFF00, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
- DAG.getConstant(0xFF0000, dl, VT));
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
case MVT::i64:
Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
- Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
- Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<8, dl, VT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
- Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
- DAG.getConstant(255ULL<<48, dl, VT));
- Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
- DAG.getConstant(255ULL<<40, dl, VT));
- Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
- DAG.getConstant(255ULL<<32, dl, VT));
Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
- DAG.getConstant(255ULL<<8 , dl, VT));
+ DAG.getConstant(255ULL<<8, dl, VT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
@@ -8132,6 +8667,82 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
}
}
+SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+
+ if (!VT.isSimple())
+ return SDValue();
+
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().getScalarType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::i16:
+ Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
+ Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
+ Mask, EVL);
+ Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
+ Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
+ Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
+ Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
+ DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
+ DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
+ Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
+ Mask, EVL);
+ Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
+ Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
+ }
+}
+
SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -8194,6 +8805,68 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
return Tmp;
}
+SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
+ assert(N->getOpcode() == ISD::VP_BITREVERSE);
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2, Tmp3;
+
+ // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+ // and finally the i1 pairs.
+ // TODO: We can easily support i4/i2 legal types if any target ever does.
+ if (Sz >= 8 && isPowerOf2_32(Sz)) {
+ // Create the masks - repeating the pattern every byte.
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
+
+ // BSWAP if the type is wider than a single byte.
+ Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
+
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask4, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask2, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask1, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+ return Tmp;
+ }
+ return SDValue();
+}
+
std::pair<SDValue, SDValue>
TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SelectionDAG &DAG) const {
@@ -8671,7 +9344,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
} else if (DataVT.isScalableVector()) {
Increment = DAG.getVScale(DL, AddrVT,
APInt(AddrVT.getFixedSizeInBits(),
- DataVT.getStoreSize().getKnownMinSize()));
+ DataVT.getStoreSize().getKnownMinValue()));
} else
Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
@@ -8957,9 +9630,13 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
assert(VT == RHS.getValueType() && "Expected operands to be the same type");
assert(VT.isInteger() && "Expected operands to be integers");
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
unsigned BW = VT.getScalarSizeInBits();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
SDValue Orig =
DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
@@ -8968,14 +9645,14 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
if (IsSigned) {
SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
- SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
- SatMin, SatMax, ISD::SETLT);
+ SDValue Cond =
+ DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
+ SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
} else {
SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
}
- Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
-
- return Result;
+ SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
+ return DAG.getSelect(dl, VT, Cond, SatVal, Result);
}
SDValue
@@ -9665,7 +10342,7 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
// Store the hi part of CONCAT_VECTORS(V1, V2)
SDValue OffsetToV2 = DAG.getVScale(
DL, PtrVT,
- APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
+ APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
@@ -9686,9 +10363,10 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
if (TrailingElts > VT.getVectorMinNumElements()) {
- SDValue VLBytes = DAG.getVScale(
- DL, PtrVT,
- APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
+ SDValue VLBytes =
+ DAG.getVScale(DL, PtrVT,
+ APInt(PtrVT.getFixedSizeInBits(),
+ VT.getStoreSize().getKnownMinValue()));
TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
}
@@ -9757,7 +10435,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
NeedInvert = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETO:
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETO is expanded, SETOEQ must be legal!");
@@ -9781,7 +10459,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
NeedInvert = ((unsigned)CCCode & 0x8U);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
@@ -9802,7 +10480,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
break;
}
// Fallthrough if we are unsigned integer.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETLE:
case ISD::SETGT:
case ISD::SETGE: