summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp906
1 files changed, 645 insertions, 261 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e8950b58d42d..e5bc08b9280a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -131,6 +131,7 @@ namespace {
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
+ bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
bool ForCodeSize;
@@ -179,6 +180,12 @@ namespace {
AddToWorklist(Node);
}
+ /// Convenient shorthand to add a node and all of its user to the worklist.
+ void AddToWorklistWithUsers(SDNode *N) {
+ AddUsersToWorklist(N);
+ AddToWorklist(N);
+ }
+
// Prune potentially dangling nodes. This is called after
// any visit to a node, but should also be called during a visit after any
// failed combine which may have created a DAG node.
@@ -217,14 +224,16 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ ForCodeSize = DAG.shouldOptForSize();
MaximumLegalStoreInBits = 0;
+ // We use the minimum store size here, since that's all we can guarantee
+ // for the scalable vector types.
for (MVT VT : MVT::all_valuetypes())
if (EVT(VT).isSimple() && VT != MVT::Other &&
TLI.isTypeLegal(EVT(VT)) &&
- VT.getSizeInBits() >= MaximumLegalStoreInBits)
- MaximumLegalStoreInBits = VT.getSizeInBits();
+ VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
}
void ConsiderForPruning(SDNode *N) {
@@ -622,7 +631,7 @@ namespace {
ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
/// can be combined into narrow loads.
- bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
+ bool BackwardsPropagateMask(SDNode *N);
/// Helper function for MergeConsecutiveStores which merges the
/// component store chains.
@@ -1026,8 +1035,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
- AddToWorklist(TLO.New.getNode());
- AddUsersToWorklist(TLO.New.getNode());
+ AddToWorklistWithUsers(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1393,6 +1401,7 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
+ LegalDAG = Level >= AfterLegalizeDAG;
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
@@ -1419,14 +1428,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// If this combine is running after legalizing the DAG, re-legalize any
// nodes pulled off the worklist.
- if (Level == AfterLegalizeDAG) {
+ if (LegalDAG) {
SmallSetVector<SDNode *, 16> UpdatedNodes;
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
- for (SDNode *LN : UpdatedNodes) {
- AddUsersToWorklist(LN);
- AddToWorklist(LN);
- }
+ for (SDNode *LN : UpdatedNodes)
+ AddToWorklistWithUsers(LN);
+
if (!NIsValid)
continue;
}
@@ -2800,6 +2808,96 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
return SDValue();
}
+// If we are facing some sort of diamond carry/borrow in/out pattern try to
+// match patterns like:
+//
+// (uaddo A, B) CarryIn
+// | \ |
+// | \ |
+// PartialSum PartialCarryOutX /
+// | | /
+// | ____|____________/
+// | / |
+// (uaddo *, *) \________
+// | \ \
+// | \ |
+// | PartialCarryOutY |
+// | \ |
+// | \ /
+// AddCarrySum | ______/
+// | /
+// CarryOut = (or *, *)
+//
+// And generate ADDCARRY (or SUBCARRY) with two result values:
+//
+// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
+//
+// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
+// a single path for carry/borrow out propagation:
+static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDValue Carry0,
+ SDValue Carry1, SDNode *N) {
+ if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
+ return SDValue();
+ unsigned Opcode = Carry0.getOpcode();
+ if (Opcode != Carry1.getOpcode())
+ return SDValue();
+ if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
+ return SDValue();
+
+ // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
+ // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
+ // the above ASCII art.)
+ if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+ Carry1.getOperand(1) != Carry0.getValue(0))
+ std::swap(Carry0, Carry1);
+ if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+ Carry1.getOperand(1) != Carry0.getValue(0))
+ return SDValue();
+
+ // The carry in value must be on the righthand side for subtraction.
+ unsigned CarryInOperandNum =
+ Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
+ if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
+ return SDValue();
+ SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
+
+ unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
+ return SDValue();
+
+ // Verify that the carry/borrow in is plausibly a carry/borrow bit.
+ // TODO: make getAsCarry() aware of how partial carries are merged.
+ if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ CarryIn = CarryIn.getOperand(0);
+ if (CarryIn.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Merged =
+ DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
+ Carry0.getOperand(1), CarryIn);
+
+ // Please note that because we have proven that the result of the UADDO/USUBO
+ // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
+ // therefore prove that if the first UADDO/USUBO overflows, the second
+ // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
+ // maximum value.
+ //
+ // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
+ // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
+ //
+ // This is important because it means that OR and XOR can be used to merge
+ // carry flags; and that AND can return a constant zero.
+ //
+ // TODO: match other operations that can merge flags (ADD, etc)
+ DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
+ if (N->getOpcode() == ISD::AND)
+ return DAG.getConstant(0, DL, MVT::i1);
+ return Merged.getValue(1);
+}
+
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
// fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
@@ -3006,6 +3104,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
N1.getOperand(0)));
+ // A - (A & B) -> A & (~B)
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue A = N1.getOperand(0);
+ SDValue B = N1.getOperand(1);
+ if (A != N0)
+ std::swap(A, B);
+ if (A == N0 &&
+ (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
+ SDValue InvB =
+ DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::AND, DL, VT, A, InvB);
+ }
+ }
+
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -4225,7 +4337,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
unsigned Opcode = N->getOpcode();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegal(Opcode, VT) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
@@ -4543,8 +4654,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
// (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
// (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
if (LL == RL && LR == RR) {
- ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
- : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
+ : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
if (NewCC != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
@@ -4856,7 +4967,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
return true;
}
-bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!Mask)
return false;
@@ -5092,6 +5203,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -5238,14 +5352,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- if (Level >= AfterLegalizeTypes) {
+ if (LegalTypes) {
// Attempt to propagate the AND back up to the leaves which, if they're
// loads, can be combined to narrow loads and the AND node can be removed.
// Perform after legalization so that extend nodes will already be
// combined into the loads.
- if (BackwardsPropagateMask(N, DAG)) {
+ if (BackwardsPropagateMask(N))
return SDValue(N, 0);
- }
}
if (SDValue Combined = visitANDLike(N0, N1, N))
@@ -5787,6 +5900,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
return BSwap;
@@ -6418,7 +6534,7 @@ static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
// endian, and None if match failed.
-static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
@@ -6491,7 +6607,6 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
return SDValue();
@@ -6499,7 +6614,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
// to the same base address. Collect bytes offsets from Base address into
// ByteOffsets.
SDValue CombinedValue;
- SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+ SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
@@ -6655,13 +6770,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
unsigned ByteWidth = VT.getSizeInBits() / 8;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Before legalize we can introduce too wide illegal loads which will be later
- // split into legal sized loads. This enables us to combine i64 load by i8
- // patterns to a couple of i32 loads on 32 bit targets.
- if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
- return SDValue();
-
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
@@ -6683,12 +6791,22 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if all the bytes of the OR we are looking at are loaded from the same
// base address. Collect bytes offsets from Base address in ByteOffsets.
- SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
- for (unsigned i = 0; i < ByteWidth; i++) {
+ SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
+ unsigned ZeroExtendedBytes = 0;
+ for (int i = ByteWidth - 1; i >= 0; --i) {
auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
- if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ if (!P)
return SDValue();
+ if (P->isConstantZero()) {
+ // It's OK for the N most significant bytes to be 0, we can just
+ // zero-extend the load.
+ if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
+ return SDValue();
+ continue;
+ }
+ assert(P->isMemory() && "provenance should either be memory or zero");
+
LoadSDNode *L = P->Load;
assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() &&
@@ -6727,9 +6845,26 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
assert(Base && "Base address of the accessed memory location must be set");
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ bool NeedsZext = ZeroExtendedBytes > 0;
+
+ EVT MemVT =
+ EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
+
+ if (!MemVT.isSimple())
+ return SDValue();
+
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+ MemVT))
+ return SDValue();
+
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ Optional<bool> IsBigEndian = isBigEndian(
+ makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
if (!IsBigEndian.hasValue())
return SDValue();
@@ -6742,7 +6877,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
LoadSDNode *FirstLoad = FirstByteProvider->Load;
// The node we are looking at matches with the pattern, check if we can
- // replace it with a single load and bswap if needed.
+ // replace it with a single (possibly zero-extended) load and bswap + shift if
+ // needed.
// If the load needs byte swap check if the target supports it
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
@@ -6750,25 +6886,45 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
// load and byte shuffling instead of several loads and byte shuffling.
- if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ // We do not introduce illegal bswaps when zero-extending as this tends to
+ // introduce too many arithmetic instructions.
+ if (NeedsBswap && (LegalOperations || NeedsZext) &&
+ !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // If we need to bswap and zero extend, we have to insert a shift. Check that
+ // it is legal.
+ if (NeedsBswap && NeedsZext && LegalOperations &&
+ !TLI.isOperationLegal(ISD::SHL, VT))
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
- bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- VT, *FirstLoad->getMemOperand(), &Fast);
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ *FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
- SDValue NewLoad =
- DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+ SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+ SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), MemVT,
+ FirstLoad->getAlignment());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
- return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+ if (!NeedsBswap)
+ return NewLoad;
+
+ SDValue ShiftedLoad =
+ NeedsZext
+ ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+ DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
+ SDLoc(N), LegalOperations))
+ : NewLoad;
+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
}
// If the target has andn, bsl, or a similar bit-select instruction,
@@ -6904,7 +7060,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
- LHS.getValueType().isInteger());
+ LHS.getValueType());
if (!LegalOperations ||
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
switch (N0Opcode) {
@@ -6964,6 +7120,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
DAG.getAllOnesConstant(DL, VT));
}
+ // fold (not (add X, -1)) -> (neg X)
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+ }
+
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
SDValue X = N0.getOperand(0);
@@ -7051,6 +7214,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
return SDValue();
}
@@ -7567,8 +7733,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (VT.isVector())
ExtVT = EVT::getVectorVT(*DAG.getContext(),
ExtVT, VT.getVectorNumElements());
- if ((!LegalOperations ||
- TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ if (!LegalOperations ||
+ TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
+ TargetLowering::Legal)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
N0.getOperand(0), DAG.getValueType(ExtVT));
}
@@ -7776,26 +7943,40 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
- // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
- // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
- if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
+ SDValue InnerShift = N0.getOperand(0);
+ // TODO - support non-uniform vector shift amounts.
+ if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
uint64_t c1 = N001C->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
+ EVT InnerShiftVT = InnerShift.getValueType();
+ EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
- SDLoc DL(N0);
+ SDLoc DL(N);
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(ISD::SRL, DL, InnerShiftVT,
- N0.getOperand(0).getOperand(0),
- DAG.getConstant(c1 + c2, DL,
- ShiftCountVT)));
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
+ }
+ // In the more general case, we can clear the high bits after the shift:
+ // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
+ if (N0.hasOneUse() && InnerShift.hasOneUse() &&
+ c1 + c2 < InnerShiftSize) {
+ SDLoc DL(N);
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
+ OpSizeInBits - c2),
+ DL, InnerShiftVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
}
}
}
@@ -8585,6 +8766,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -8609,6 +8794,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -9108,6 +9297,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
return SDValue();
+ assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
+
SDLoc DL(N);
const unsigned NumSplits =
DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
@@ -9125,8 +9316,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -9365,11 +9555,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
SDLoc dl(Ld);
SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
- SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
- Ld->getBasePtr(), Ld->getMask(),
- PassThru, Ld->getMemoryVT(),
- Ld->getMemOperand(), ExtLoadType,
- Ld->isExpandingLoad());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
+ ExtLoadType, Ld->isExpandingLoad());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
return NewLoad;
}
@@ -9397,10 +9586,15 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
// sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
// zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
SDLoc DL(N);
- SDValue NotX = DAG.getNOT(DL, X, VT);
- SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
- auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
- return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ unsigned ShCt = VT.getSizeInBits() - 1;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
+ auto ShiftOpcode =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
}
return SDValue();
}
@@ -9671,6 +9865,29 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
return (Known.Zero | 1).isAllOnesValue();
}
+/// Given an extending node with a pop-count operand, if the target does not
+/// support a pop-count in the narrow source type but does support it in the
+/// destination type, widen the pop-count to the destination type.
+static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
+ assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
+ Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
+
+ SDValue CtPop = Extend->getOperand(0);
+ if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
+ return SDValue();
+
+ EVT VT = Extend->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
+ !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
+ return SDValue();
+
+ // zext (ctpop X) --> ctpop (zext X)
+ SDLoc DL(Extend);
+ SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
+ return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -9921,6 +10138,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
return SDValue();
}
@@ -10067,6 +10287,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SCC;
}
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
return SDValue();
}
@@ -10273,17 +10496,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
ShAmt = AdjustBigEndianShift(ShAmt);
- EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
- PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, DL, PtrType),
- Flags);
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -10735,16 +10955,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
SDValue VecSrc = N0.getOperand(0);
- EVT SrcVT = VecSrc.getValueType();
- if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
+ EVT VecSrcVT = VecSrc.getValueType();
+ if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
(!LegalOperations ||
- TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
SDLoc SL(N);
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
- unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
- VecSrc, DAG.getConstant(Idx, SL, IdxVT));
+ unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
+ DAG.getConstant(Idx, SL, IdxVT));
}
}
@@ -11299,11 +11519,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
@@ -11359,7 +11579,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11373,7 +11594,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
@@ -11427,7 +11649,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1, Flags);
@@ -11456,7 +11679,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1, Flags);
@@ -11471,7 +11695,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
if (isContractableFMUL(N120) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0, Flags);
@@ -11489,7 +11714,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0, Flags);
@@ -11510,11 +11736,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
@@ -11579,7 +11805,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11595,7 +11822,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11617,7 +11845,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11640,7 +11869,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N000.getValueType())) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11671,7 +11901,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N1.getOperand(2))) {
+ isContractableFMUL(N1.getOperand(2)) &&
+ N1->hasOneUse()) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11686,12 +11917,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (N0.getOpcode() == PreferredFusedOpcode &&
+ N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11716,7 +11949,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11736,10 +11970,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableFMUL(N120) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11768,7 +12004,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);
if (isContractableFMUL(N102) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11812,7 +12049,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// Floating-point multiply-add with intermediate rounding. This can result
@@ -12402,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
}
+ // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
+ // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
+ if (!TLI.isFNegFree(VT) &&
+ TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations,
+ ForCodeSize) == 2)
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ TLI.getNegatedExpression(SDValue(N, 0), DAG,
+ LegalOperations, ForCodeSize),
+ Flags);
return SDValue();
}
@@ -12738,7 +12984,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// Assume that libcalls are the smallest code.
// TODO: This restriction should probably be lifted for vectors.
- if (DAG.getMachineFunction().getFunction().hasOptSize())
+ if (ForCodeSize)
return SDValue();
// pow(X, 0.25) --> sqrt(sqrt(X))
@@ -13135,6 +13381,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is
+ // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it
+ // was called from a context with a nsz flag if the input fsub does not.
+ if (N0.getOpcode() == ISD::FSUB &&
+ (DAG.getTarget().Options.NoSignedZerosFPMath ||
+ N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
+ return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
+ N0.getOperand(0), N->getFlags());
+ }
+
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
if (!TLI.isFNegFree(VT) &&
@@ -13168,9 +13424,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (CFP1) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
- if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
return DAG.getNode(
ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
@@ -13423,12 +13678,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
EVT VT;
unsigned AS;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
VT = LD->getMemoryVT();
AS = LD->getAddressSpace();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
VT = ST->getMemoryVT();
@@ -13462,38 +13727,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
VT.getTypeForEVT(*DAG.getContext()), AS);
}
-/// Try turning a load/store into a pre-indexed load/store when the base
-/// pointer is an add or subtract and it has other uses besides the load/store.
-/// After the transformation, the new indexed load/store has effectively folded
-/// the add/subtract in and all of its other uses are redirected to the
-/// new load/store.
-bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
- if (Level < AfterLegalizeDAG)
- return false;
-
- bool isLoad = true;
- SDValue Ptr;
- EVT VT;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
+ bool &IsLoad, bool &IsMasked, SDValue &Ptr,
+ const TargetLowering &TLI) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
- VT = LD->getMemoryVT();
- if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
- !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
return false;
Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
if (ST->isIndexed())
return false;
- VT = ST->getMemoryVT();
- if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
- !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
return false;
Ptr = ST->getBasePtr();
- isLoad = false;
+ IsLoad = false;
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedLoadLegal(Dec, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedStoreLegal(Dec, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ IsLoad = false;
+ IsMasked = true;
} else {
return false;
}
+ return true;
+}
+
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue Ptr;
+ if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
+ Ptr, TLI))
+ return false;
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
// out. There is no reason to make this a preinc/predec.
@@ -13535,8 +13826,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
// Check #2.
- if (!isLoad) {
- SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (!IsLoad) {
+ SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
+ : cast<StoreSDNode>(N)->getValue();
// Would require a copy.
if (Val == BasePtr)
@@ -13612,18 +13904,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
SDValue Result;
- if (isLoad)
- Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
- else
- Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
+ if (!IsMasked) {
+ if (IsLoad)
+ Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ else
+ Result =
+ DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ } else {
+ if (IsLoad)
+ Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ else
+ Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ }
++PreIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
- if (isLoad) {
+ if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
@@ -13677,7 +13977,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// We can now generate the new expression.
SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
- SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+ SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
SDValue NewUse = DAG.getNode(Opcode,
DL,
@@ -13687,7 +13987,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
}
// Replace the uses of Ptr with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Ptr.getNode());
AddToWorklist(Result.getNode());
@@ -13702,29 +14002,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
- bool isLoad = true;
+ bool IsLoad = true;
+ bool IsMasked = false;
SDValue Ptr;
- EVT VT;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- if (LD->isIndexed())
- return false;
- VT = LD->getMemoryVT();
- if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
- !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
- return false;
- Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- if (ST->isIndexed())
- return false;
- VT = ST->getMemoryVT();
- if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
- !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
- return false;
- Ptr = ST->getBasePtr();
- isLoad = false;
- } else {
+ if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
+ Ptr, TLI))
return false;
- }
if (Ptr.getNode()->hasOneUse())
return false;
@@ -13760,7 +14043,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// If all the uses are load / store addresses, then don't do the
// transformation.
- if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
bool RealUse = false;
for (SDNode *UseUse : Use->uses()) {
if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
@@ -13786,18 +14069,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
Worklist.push_back(Op);
if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
!SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
- SDValue Result = isLoad
- ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM)
- : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
+ SDValue Result;
+ if (!IsMasked)
+ Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM)
+ : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorklistRemover DeadNodes(*this);
- if (isLoad) {
+ if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
@@ -13809,7 +14098,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(isLoad ? 1 : 0));
+ Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Op);
return true;
}
@@ -13923,8 +14212,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
- Offset = (STMemType.getStoreSizeInBits() -
- LDMemType.getStoreSizeInBits()) / 8 - Offset;
+ Offset = ((int64_t)STMemType.getStoreSizeInBits() -
+ (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
// Check that the stored value cover all bits that are loaded.
bool STCoversLD =
@@ -14066,7 +14355,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
return V;
// Try to infer better alignment information than the load already has.
- if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
SDValue NewLoad = DAG.getExtLoad(
@@ -14786,8 +15075,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
- Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
NewAlign = MinAlign(NewAlign, StOffset);
}
@@ -14898,10 +15186,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
return SDValue();
- SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
- Ptr.getValueType(), Ptr,
- DAG.getConstant(PtrOff, SDLoc(LD),
- Ptr.getValueType()));
+ SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -15081,7 +15366,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// The latest Node in the DAG.
SDLoc DL(StoreNodes[0].MemNode);
- int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+ TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
@@ -15466,7 +15751,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
Attribute::NoImplicitFloat);
// This function cannot currently deal with non-byte-sized memory sizes.
- if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+ if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
return false;
if (!MemVT.isSimple())
@@ -16015,6 +16300,9 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if (Value.getOpcode() == ISD::TargetConstantFP)
return SDValue();
+ if (!ISD::isNormalStore(ST))
+ return SDValue();
+
SDLoc DL(ST);
SDValue Chain = ST->getChain();
@@ -16075,8 +16363,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
@@ -16111,8 +16398,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
DAG, *ST->getMemOperand())) {
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
- ST->getPointerInfo(), ST->getAlignment(),
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ ST->getMemOperand());
}
}
@@ -16121,7 +16407,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return Chain;
// Try to infer better alignment information than the store already has.
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
SDValue NewStore =
@@ -16451,9 +16737,7 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getAlignment(), MMOFlags, AAInfo);
- Ptr =
- DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
// Higher value store.
SDValue St1 =
DAG.getStore(St0, DL, Hi, Ptr,
@@ -16464,11 +16748,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
/// Convert a disguised subvector insertion into a shuffle:
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
SDValue InsertVal = N->getOperand(1);
SDValue Vec = N->getOperand(0);
- // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
- // --> (vector_shuffle X, Y)
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+ // InsIndex)
+ // --> (vector_shuffle X, Y) and variations where shuffle operands may be
+ // CONCAT_VECTORS.
if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(InsertVal.getOperand(1))) {
@@ -16481,18 +16769,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
// Vec's operand 0 is using indices from 0 to N-1 and
// operand 1 from N to 2N - 1, where N is the number of
// elements in the vectors.
- int XOffset = -1;
- if (InsertVal.getOperand(0) == X) {
- XOffset = 0;
- } else if (InsertVal.getOperand(0) == Y) {
- XOffset = X.getValueType().getVectorNumElements();
+ SDValue InsertVal0 = InsertVal.getOperand(0);
+ int ElementOffset = -1;
+
+ // We explore the inputs of the shuffle in order to see if we find the
+ // source of the extract_vector_elt. If so, we can use it to modify the
+ // shuffle rather than perform an insert_vector_elt.
+ SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+ ArgWorkList.emplace_back(Mask.size(), Y);
+ ArgWorkList.emplace_back(0, X);
+
+ while (!ArgWorkList.empty()) {
+ int ArgOffset;
+ SDValue ArgVal;
+ std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+ if (ArgVal == InsertVal0) {
+ ElementOffset = ArgOffset;
+ break;
+ }
+
+ // Peek through concat_vector.
+ if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+ int CurrentArgOffset =
+ ArgOffset + ArgVal.getValueType().getVectorNumElements();
+ int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+ for (SDValue Op : reverse(ArgVal->ops())) {
+ CurrentArgOffset -= Step;
+ ArgWorkList.emplace_back(CurrentArgOffset, Op);
+ }
+
+ // Make sure we went through all the elements and did not screw up index
+ // computation.
+ assert(CurrentArgOffset == ArgOffset);
+ }
}
- if (XOffset != -1) {
+ if (ElementOffset != -1) {
SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
- NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
assert(NewMask[InsIndex] <
(int)(2 * Vec.getValueType().getVectorNumElements()) &&
NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
@@ -16562,13 +16879,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(2);
SDLoc DL(N);
- // If the inserted element is an UNDEF, just use the input vector.
- if (InVal.isUndef())
- return InVec;
-
EVT VT = InVec.getValueType();
unsigned NumElts = VT.getVectorNumElements();
+ // Insert into out-of-bounds element is undefined.
+ if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo))
+ if (IndexC->getZExtValue() >= VT.getVectorNumElements())
+ return DAG.getUNDEF(VT);
+
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -16683,7 +17001,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
- NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
+ NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
@@ -16723,8 +17041,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
AddToWorklist(EVE);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddUsersToWorklist(Load.getNode()); // Add users too
- AddToWorklist(Load.getNode());
+ AddToWorklistWithUsers(Load.getNode());
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -18239,22 +18556,61 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBitcast(NVT, NewExtract);
}
}
- // TODO - handle (DestNumElts % SrcNumElts) == 0
+ if ((DestNumElts % SrcNumElts) == 0) {
+ unsigned DestSrcRatio = DestNumElts / SrcNumElts;
+ if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
+ unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getScalarType(), NewExtNumElts);
+ if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ }
}
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same
- // type.
- if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
- V.getOperand(0).getValueType() == NVT) {
- unsigned Idx = N->getConstantOperandVal(1);
- unsigned NumElems = NVT.getVectorNumElements();
- assert((Idx % NumElems) == 0 &&
- "IDX in concat is not a multiple of the result vector length.");
- return V->getOperand(Idx / NumElems);
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
+ EVT ConcatSrcVT = V.getOperand(0).getValueType();
+ assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "Concat and extract subvector do not change element type");
+
+ unsigned ExtIdx = N->getConstantOperandVal(1);
+ unsigned ExtNumElts = NVT.getVectorNumElements();
+ assert(ExtIdx % ExtNumElts == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+
+ unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
+ unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
+
+ // If the concatenated source types match this extract, it's a direct
+ // simplification:
+ // extract_subvec (concat V1, V2, ...), i --> Vi
+ if (ConcatSrcNumElts == ExtNumElts)
+ return V.getOperand(ConcatOpIdx);
+
+ // If the concatenated source vectors are a multiple length of this extract,
+ // then extract a fraction of one of those source vectors directly from a
+ // concat operand. Example:
+ // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
+ // v2i8 extract_subvec v8i8 Y, 6
+ if (ConcatSrcNumElts % ExtNumElts == 0) {
+ SDLoc DL(N);
+ unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
+ assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
+ "Trying to extract from >1 concat operand?");
+ assert(NewExtIdx % ExtNumElts == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
+ V.getOperand(ConcatOpIdx), NewIndexC);
+ }
}
V = peekThroughBitcasts(V);
@@ -18962,6 +19318,30 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
+ // A shuffle of a concat of the same narrow vector can be reduced to use
+ // only low-half elements of a concat with undef:
+ // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
+ N0.getNumOperands() == 2 &&
+ N0.getOperand(0) == N0.getOperand(1)) {
+ int HalfNumElts = (int)NumElts / 2;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= HalfNumElts) {
+ assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
+ Idx -= HalfNumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (TLI.isShuffleMaskLegal(NewMask, VT)) {
+ SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
+ SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N0.getOperand(0), UndefVec);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -19446,8 +19826,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
int EltIdx = i / Split;
int SubIdx = i % Split;
SDValue Elt = RHS.getOperand(EltIdx);
+ // X & undef --> 0 (not undef). So this lane must be converted to choose
+ // from the zero constant vector (same as if the element had all 0-bits).
if (Elt.isUndef()) {
- Indices.push_back(-1);
+ Indices.push_back(i + NumSubElts);
continue;
}
@@ -19460,14 +19842,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
// Extract the sub element from the constant bit mask.
- if (DAG.getDataLayout().isBigEndian()) {
- Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
- } else {
- Bits.lshrInPlace(SubIdx * NumSubBits);
- }
-
- if (Split > 1)
- Bits = Bits.trunc(NumSubBits);
+ if (DAG.getDataLayout().isBigEndian())
+ Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
+ else
+ Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
if (Bits.isAllOnesValue())
Indices.push_back(i);
@@ -19910,22 +20288,28 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
- SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
- SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
- AddToWorklist(Shift.getNode());
-
- if (XType.bitsGT(AType)) {
- Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
- }
- if (CC == ISD::SETGT)
- Shift = DAG.getNOT(DL, Shift, AType);
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
- return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
}
- SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+ unsigned ShCt = XType.getSizeInBits() - 1;
+ if (TLI.shouldAvoidTransformToShift(XType, ShCt))
+ return SDValue();
+
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
@@ -20035,31 +20419,29 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
- // TODO: The operation legality checks could be loosened to include "custom",
- // but that may cause regressions for targets that do not have shift
- // instructions.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
- N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
- TLI.isOperationLegal(ISD::SHL, VT) &&
- TLI.isOperationLegal(ISD::SRA, VT)) {
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
- SDValue ShlAmt =
- DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
- getShiftAmountTy(AndLHS.getValueType()));
- SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
-
- // Now arithmetic right shift it all the way over, so the result is either
- // all-ones, or zero.
- SDValue ShrAmt =
- DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
- getShiftAmountTy(Shl.getValueType()));
- SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
-
- return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ unsigned ShCt = AndMask.getBitWidth() - 1;
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is
+ // either all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(ShCt, SDLoc(Shl),
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
}
}
@@ -20073,7 +20455,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
if (Swap) {
- CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+ CC = ISD::getSetCCInverse(CC, CmpOpVT);
std::swap(N2C, N3C);
}
@@ -20101,10 +20483,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (N2C->isOne())
return Temp;
+ unsigned ShCt = N2C->getAPIntValue().logBase2();
+ if (TLI.shouldAvoidTransformToShift(VT, ShCt))
+ return SDValue();
+
// shl setcc result by log2 n2c
return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
- SDLoc(Temp),
+ DAG.getConstant(ShCt, SDLoc(Temp),
getShiftAmountTy(Temp.getValueType())));
}
@@ -20237,7 +20622,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Result = N X_i + X_i (N - N A X_i)
SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
SDNodeFlags Flags) {
- if (Level >= AfterLegalizeDAG)
+ if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
@@ -20376,7 +20761,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Op can be zero.
SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
bool Reciprocal) {
- if (Level >= AfterLegalizeDAG)
+ if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
@@ -20411,9 +20796,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
SDLoc DL(Op);
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
- const Function &F = DAG.getMachineFunction().getFunction();
- Attribute Denorms = F.getFnAttribute("denormal-fp-math");
- if (Denorms.getValueAsString().equals("ieee")) {
+ DenormalMode DenormMode = DAG.getDenormalMode(VT);
+ if (DenormMode == DenormalMode::IEEE) {
// fabs(X) < SmallestNormal ? 0.0 : Est
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);