diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
20 files changed, 4392 insertions, 2770 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e8950b58d42d..e5bc08b9280a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -131,6 +131,7 @@ namespace { const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; + bool LegalDAG = false; bool LegalOperations = false; bool LegalTypes = false; bool ForCodeSize; @@ -179,6 +180,12 @@ namespace { AddToWorklist(Node); } + /// Convenient shorthand to add a node and all of its user to the worklist. + void AddToWorklistWithUsers(SDNode *N) { + AddUsersToWorklist(N); + AddToWorklist(N); + } + // Prune potentially dangling nodes. This is called after // any visit to a node, but should also be called during a visit after any // failed combine which may have created a DAG node. @@ -217,14 +224,16 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + ForCodeSize = DAG.shouldOptForSize(); MaximumLegalStoreInBits = 0; + // We use the minimum store size here, since that's all we can guarantee + // for the scalable vector types. for (MVT VT : MVT::all_valuetypes()) if (EVT(VT).isSimple() && VT != MVT::Other && TLI.isTypeLegal(EVT(VT)) && - VT.getSizeInBits() >= MaximumLegalStoreInBits) - MaximumLegalStoreInBits = VT.getSizeInBits(); + VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize(); } void ConsiderForPruning(SDNode *N) { @@ -622,7 +631,7 @@ namespace { ConstantSDNode *Mask, SDNode *&NodeToMask); /// Attempt to propagate a given AND node back to load leaves so that they /// can be combined into narrow loads. - bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); + bool BackwardsPropagateMask(SDNode *N); /// Helper function for MergeConsecutiveStores which merges the /// component store chains. @@ -1026,8 +1035,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. - AddToWorklist(TLO.New.getNode()); - AddUsersToWorklist(TLO.New.getNode()); + AddToWorklistWithUsers(TLO.New.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to @@ -1393,6 +1401,7 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; + LegalDAG = Level >= AfterLegalizeDAG; LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; @@ -1419,14 +1428,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // If this combine is running after legalizing the DAG, re-legalize any // nodes pulled off the worklist. - if (Level == AfterLegalizeDAG) { + if (LegalDAG) { SmallSetVector<SDNode *, 16> UpdatedNodes; bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); - for (SDNode *LN : UpdatedNodes) { - AddUsersToWorklist(LN); - AddToWorklist(LN); - } + for (SDNode *LN : UpdatedNodes) + AddToWorklistWithUsers(LN); + if (!NIsValid) continue; } @@ -2800,6 +2808,96 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, return SDValue(); } +// If we are facing some sort of diamond carry/borrow in/out pattern try to +// match patterns like: +// +// (uaddo A, B) CarryIn +// | \ | +// | \ | +// PartialSum PartialCarryOutX / +// | | / +// | ____|____________/ +// | / | +// (uaddo *, *) \________ +// | \ \ +// | \ | +// | PartialCarryOutY | +// | \ | +// | \ / +// AddCarrySum | ______/ +// | / +// CarryOut = (or *, *) +// +// And generate ADDCARRY (or SUBCARRY) with two result values: +// +// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn) +// +// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with +// a single path for carry/borrow out propagation: +static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue Carry0, + SDValue Carry1, SDNode *N) { + if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1) + return SDValue(); + unsigned Opcode = Carry0.getOpcode(); + if (Opcode != Carry1.getOpcode()) + return SDValue(); + if (Opcode != ISD::UADDO && Opcode != ISD::USUBO) + return SDValue(); + + // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the + // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in + // the above ASCII art.) + if (Carry1.getOperand(0) != Carry0.getValue(0) && + Carry1.getOperand(1) != Carry0.getValue(0)) + std::swap(Carry0, Carry1); + if (Carry1.getOperand(0) != Carry0.getValue(0) && + Carry1.getOperand(1) != Carry0.getValue(0)) + return SDValue(); + + // The carry in value must be on the righthand side for subtraction. + unsigned CarryInOperandNum = + Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0; + if (Opcode == ISD::USUBO && CarryInOperandNum != 1) + return SDValue(); + SDValue CarryIn = Carry1.getOperand(CarryInOperandNum); + + unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; + if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType())) + return SDValue(); + + // Verify that the carry/borrow in is plausibly a carry/borrow bit. + // TODO: make getAsCarry() aware of how partial carries are merged. + if (CarryIn.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + CarryIn = CarryIn.getOperand(0); + if (CarryIn.getValueType() != MVT::i1) + return SDValue(); + + SDLoc DL(N); + SDValue Merged = + DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0), + Carry0.getOperand(1), CarryIn); + + // Please note that because we have proven that the result of the UADDO/USUBO + // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can + // therefore prove that if the first UADDO/USUBO overflows, the second + // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the + // maximum value. + // + // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry + // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow) + // + // This is important because it means that OR and XOR can be used to merge + // carry flags; and that AND can return a constant zero. + // + // TODO: match other operations that can merge flags (ADD, etc) + DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0)); + if (N->getOpcode() == ISD::AND) + return DAG.getConstant(0, DL, MVT::i1); + return Merged.getValue(1); +} + SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry. @@ -3006,6 +3104,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), N1.getOperand(0))); + // A - (A & B) -> A & (~B) + if (N1.getOpcode() == ISD::AND) { + SDValue A = N1.getOperand(0); + SDValue B = N1.getOperand(1); + if (A != N0) + std::swap(A, B); + if (A == N0 && + (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) { + SDValue InvB = + DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::AND, DL, VT, A, InvB); + } + } + // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && @@ -4225,7 +4337,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. unsigned Opcode = N->getOpcode(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegal(Opcode, VT) && (N0.isUndef() || DAG.SignBitIsZero(N0)) && (N1.isUndef() || DAG.SignBitIsZero(N1))) { @@ -4543,8 +4654,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) if (LL == RL && LR == RR) { - ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger) - : ISD::getSetCCOrOperation(CC0, CC1, IsInteger); + ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT) + : ISD::getSetCCOrOperation(CC0, CC1, OpVT); if (NewCC != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && @@ -4856,7 +4967,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, return true; } -bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { +bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (!Mask) return false; @@ -5092,6 +5203,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; + if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + return Combined; + // fold (and (or x, C), D) -> D if (C & D) == D auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); @@ -5238,14 +5352,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (Level >= AfterLegalizeTypes) { + if (LegalTypes) { // Attempt to propagate the AND back up to the leaves which, if they're // loads, can be combined to narrow loads and the AND node can be removed. // Perform after legalization so that extend nodes will already be // combined into the loads. - if (BackwardsPropagateMask(N, DAG)) { + if (BackwardsPropagateMask(N)) return SDValue(N, 0); - } } if (SDValue Combined = visitANDLike(N0, N1, N)) @@ -5787,6 +5900,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDValue Combined = visitORLike(N0, N1, N)) return Combined; + if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + return Combined; + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; @@ -6418,7 +6534,7 @@ static unsigned BigEndianByteAt(unsigned BW, unsigned i) { // Check if the bytes offsets we are looking at match with either big or // little endian value loaded. Return true for big endian, false for little // endian, and None if match failed. -static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets, +static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, int64_t FirstOffset) { // The endian can be decided only when it is 2 bytes at least. unsigned Width = ByteOffsets.size(); @@ -6491,7 +6607,6 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) return SDValue(); @@ -6499,7 +6614,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { // to the same base address. Collect bytes offsets from Base address into // ByteOffsets. SDValue CombinedValue; - SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX); + SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX); int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; Optional<BaseIndexOffset> Base; @@ -6655,13 +6770,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); unsigned ByteWidth = VT.getSizeInBits() / 8; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Before legalize we can introduce too wide illegal loads which will be later - // split into legal sized loads. This enables us to combine i64 load by i8 - // patterns to a couple of i32 loads on 32 bit targets. - if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) - return SDValue(); - bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); auto MemoryByteOffset = [&] (ByteProvider P) { assert(P.isMemory() && "Must be a memory byte provider"); @@ -6683,12 +6791,22 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Check if all the bytes of the OR we are looking at are loaded from the same // base address. Collect bytes offsets from Base address in ByteOffsets. - SmallVector<int64_t, 4> ByteOffsets(ByteWidth); - for (unsigned i = 0; i < ByteWidth; i++) { + SmallVector<int64_t, 8> ByteOffsets(ByteWidth); + unsigned ZeroExtendedBytes = 0; + for (int i = ByteWidth - 1; i >= 0; --i) { auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); - if (!P || !P->isMemory()) // All the bytes must be loaded from memory + if (!P) return SDValue(); + if (P->isConstantZero()) { + // It's OK for the N most significant bytes to be 0, we can just + // zero-extend the load. + if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i))) + return SDValue(); + continue; + } + assert(P->isMemory() && "provenance should either be memory or zero"); + LoadSDNode *L = P->Load; assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && @@ -6727,9 +6845,26 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { assert(Base && "Base address of the accessed memory location must be set"); assert(FirstOffset != INT64_MAX && "First byte offset must be set"); + bool NeedsZext = ZeroExtendedBytes > 0; + + EVT MemVT = + EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8); + + if (!MemVT.isSimple()) + return SDValue(); + + // Before legalize we can introduce too wide illegal loads which will be later + // split into legal sized loads. This enables us to combine i64 load by i8 + // patterns to a couple of i32 loads on 32 bit targets. + if (LegalOperations && + !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, + MemVT)) + return SDValue(); + // Check if the bytes of the OR we are looking at match with either big or // little endian value load - Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); + Optional<bool> IsBigEndian = isBigEndian( + makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); if (!IsBigEndian.hasValue()) return SDValue(); @@ -6742,7 +6877,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { LoadSDNode *FirstLoad = FirstByteProvider->Load; // The node we are looking at matches with the pattern, check if we can - // replace it with a single load and bswap if needed. + // replace it with a single (possibly zero-extended) load and bswap + shift if + // needed. // If the load needs byte swap check if the target supports it bool NeedsBswap = IsBigEndianTarget != *IsBigEndian; @@ -6750,25 +6886,45 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Before legalize we can introduce illegal bswaps which will be later // converted to an explicit bswap sequence. This way we end up with a single // load and byte shuffling instead of several loads and byte shuffling. - if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) + // We do not introduce illegal bswaps when zero-extending as this tends to + // introduce too many arithmetic instructions. + if (NeedsBswap && (LegalOperations || NeedsZext) && + !TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // If we need to bswap and zero extend, we have to insert a shift. Check that + // it is legal. + if (NeedsBswap && NeedsZext && LegalOperations && + !TLI.isOperationLegal(ISD::SHL, VT)) return SDValue(); // Check that a load of the wide type is both allowed and fast on the target bool Fast = false; - bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - VT, *FirstLoad->getMemOperand(), &Fast); + bool Allowed = + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + *FirstLoad->getMemOperand(), &Fast); if (!Allowed || !Fast) return SDValue(); - SDValue NewLoad = - DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoad->getAlignment()); + SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, + SDLoc(N), VT, Chain, FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), MemVT, + FirstLoad->getAlignment()); // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); - return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; + if (!NeedsBswap) + return NewLoad; + + SDValue ShiftedLoad = + NeedsZext + ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, + DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT, + SDLoc(N), LegalOperations)) + : NewLoad; + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); } // If the target has andn, bsl, or a similar bit-select instruction, @@ -6904,7 +7060,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), - LHS.getValueType().isInteger()); + LHS.getValueType()); if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { switch (N0Opcode) { @@ -6964,6 +7120,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { DAG.getAllOnesConstant(DL, VT)); } + // fold (not (add X, -1)) -> (neg X) + if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD && + isAllOnesOrAllOnesSplat(N0.getOperand(1))) { + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + } + // fold (xor (and x, y), y) -> (and (not x), y) if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { SDValue X = N0.getOperand(0); @@ -7051,6 +7214,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N)) + return Combined; + return SDValue(); } @@ -7567,8 +7733,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (VT.isVector()) ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, VT.getVectorNumElements()); - if ((!LegalOperations || - TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) + if (!LegalOperations || + TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == + TargetLowering::Legal) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); } @@ -7776,26 +7943,40 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } - // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) - // TODO - support non-uniform vector shift amounts. if (N1C && N0.getOpcode() == ISD::TRUNCATE && N0.getOperand(0).getOpcode() == ISD::SRL) { - if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { + SDValue InnerShift = N0.getOperand(0); + // TODO - support non-uniform vector shift amounts. + if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) { uint64_t c1 = N001C->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); + EVT InnerShiftVT = InnerShift.getValueType(); + EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2))) // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { - SDLoc DL(N0); + SDLoc DL(N); if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::TRUNCATE, DL, VT, - DAG.getNode(ISD::SRL, DL, InnerShiftVT, - N0.getOperand(0).getOperand(0), - DAG.getConstant(c1 + c2, DL, - ShiftCountVT))); + SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); + SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, + InnerShift.getOperand(0), NewShiftAmt); + return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift); + } + // In the more general case, we can clear the high bits after the shift: + // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask) + if (N0.hasOneUse() && InnerShift.hasOneUse() && + c1 + c2 < InnerShiftSize) { + SDLoc DL(N); + SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); + SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, + InnerShift.getOperand(0), NewShiftAmt); + SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize, + OpSizeInBits - c2), + DL, InnerShiftVT); + SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask); + return DAG.getNode(ISD::TRUNCATE, DL, VT, And); } } } @@ -8585,6 +8766,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -8609,6 +8794,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -9108,6 +9297,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) return SDValue(); + assert(!DstVT.isScalableVector() && "Unexpected scalable vector type"); + SDLoc DL(N); const unsigned NumSplits = DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); @@ -9125,8 +9316,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); - BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Stride, DL, BasePtr.getValueType())); + BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); @@ -9365,11 +9555,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, SDLoc dl(Ld); SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), - Ld->getBasePtr(), Ld->getMask(), - PassThru, Ld->getMemoryVT(), - Ld->getMemOperand(), ExtLoadType, - Ld->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(), + ExtLoadType, Ld->isExpandingLoad()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); return NewLoad; } @@ -9397,10 +9586,15 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) SDLoc DL(N); - SDValue NotX = DAG.getNOT(DL, X, VT); - SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); - auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; - return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + unsigned ShCt = VT.getSizeInBits() - 1; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT); + auto ShiftOpcode = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + } } return SDValue(); } @@ -9671,6 +9865,29 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, return (Known.Zero | 1).isAllOnesValue(); } +/// Given an extending node with a pop-count operand, if the target does not +/// support a pop-count in the narrow source type but does support it in the +/// destination type, widen the pop-count to the destination type. +static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) { + assert((Extend->getOpcode() == ISD::ZERO_EXTEND || + Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op"); + + SDValue CtPop = Extend->getOperand(0); + if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse()) + return SDValue(); + + EVT VT = Extend->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) || + !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT)) + return SDValue(); + + // zext (ctpop X) --> ctpop (zext X) + SDLoc DL(Extend); + SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT); + return DAG.getNode(ISD::CTPOP, DL, VT, NewZext); +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -9921,6 +10138,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; + if (SDValue NewCtPop = widenCtPop(N, DAG)) + return NewCtPop; + return SDValue(); } @@ -10067,6 +10287,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SCC; } + if (SDValue NewCtPop = widenCtPop(N, DAG)) + return NewCtPop; + return SDValue(); } @@ -10273,17 +10496,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (DAG.getDataLayout().isBigEndian()) ShAmt = AdjustBigEndianShift(ShAmt); - EVT PtrType = N0.getOperand(1).getValueType(); uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); - SDValue NewPtr = DAG.getNode(ISD::ADD, DL, - PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, DL, PtrType), - Flags); + SDValue NewPtr = + DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -10735,16 +10955,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue VecSrc = N0.getOperand(0); - EVT SrcVT = VecSrc.getValueType(); - if (SrcVT.isVector() && SrcVT.getScalarType() == VT && + EVT VecSrcVT = VecSrc.getValueType(); + if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT && (!LegalOperations || - TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) { SDLoc SL(N); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); - unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, - VecSrc, DAG.getConstant(Idx, SL, IdxVT)); + unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1; + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc, + DAG.getConstant(Idx, SL, IdxVT)); } } @@ -11299,11 +11519,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11359,7 +11579,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isContractableFMUL(N00) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), @@ -11373,7 +11594,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (isContractableFMUL(N10) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N10.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), @@ -11427,7 +11649,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (isContractableFMUL(N020) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N020.getValueType())) { return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), N020.getOperand(0), N020.getOperand(1), N1, Flags); @@ -11456,7 +11679,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), N002.getOperand(0), N002.getOperand(1), N1, Flags); @@ -11471,7 +11695,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N12.getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N12.getOperand(0); if (isContractableFMUL(N120) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N120.getValueType())) { return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), N120.getOperand(0), N120.getOperand(1), N0, Flags); @@ -11489,7 +11714,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (N10.getOpcode() == PreferredFusedOpcode) { SDValue N102 = N10.getOperand(2); if (isContractableFMUL(N102) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N10.getValueType())) { return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), N102.getOperand(0), N102.getOperand(1), N0, Flags); @@ -11510,11 +11736,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11579,7 +11805,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (isContractableFMUL(N00) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), @@ -11595,7 +11822,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (isContractableFMUL(N10) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N10.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, @@ -11617,7 +11845,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N00.getOpcode() == ISD::FNEG) { SDValue N000 = N00.getOperand(0); if (isContractableFMUL(N000) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, @@ -11640,7 +11869,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N00.getOpcode() == ISD::FP_EXTEND) { SDValue N000 = N00.getOperand(0); if (isContractableFMUL(N000) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N000.getValueType())) { return DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, @@ -11671,7 +11901,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && - isContractableFMUL(N1.getOperand(2))) { + isContractableFMUL(N1.getOperand(2)) && + N1->hasOneUse()) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11686,12 +11917,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode) { + if (N0.getOpcode() == PreferredFusedOpcode && + N0->hasOneUse()) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (isContractableFMUL(N020) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N020.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11716,7 +11949,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), @@ -11736,10 +11970,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && + N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); if (isContractableFMUL(N120) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + N120.getValueType())) { SDValue N1200 = N120.getOperand(0); SDValue N1201 = N120.getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11768,7 +12004,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N101 = CvtSrc.getOperand(1); SDValue N102 = CvtSrc.getOperand(2); if (isContractableFMUL(N102) && - TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { + TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, + CvtSrc.getValueType())) { SDValue N1020 = N102.getOperand(0); SDValue N1021 = N102.getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -11812,7 +12049,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // Floating-point multiply-add without intermediate rounding. bool HasFMA = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // Floating-point multiply-add with intermediate rounding. This can result @@ -12402,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } + // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) + // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) + if (!TLI.isFNegFree(VT) && + TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations, + ForCodeSize) == 2) + return DAG.getNode(ISD::FNEG, DL, VT, + TLI.getNegatedExpression(SDValue(N, 0), DAG, + LegalOperations, ForCodeSize), + Flags); return SDValue(); } @@ -12738,7 +12984,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { // Assume that libcalls are the smallest code. // TODO: This restriction should probably be lifted for vectors. - if (DAG.getMachineFunction().getFunction().hasOptSize()) + if (ForCodeSize) return SDValue(); // pow(X, 0.25) --> sqrt(sqrt(X)) @@ -13135,6 +13381,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is + // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it + // was called from a context with a nsz flag if the input fsub does not. + if (N0.getOpcode() == ISD::FSUB && + (DAG.getTarget().Options.NoSignedZerosFPMath || + N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), + N0.getOperand(0), N->getFlags()); + } + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. if (!TLI.isFNegFree(VT) && @@ -13168,9 +13424,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (CFP1) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); - if (Level >= AfterLegalizeDAG && - (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || - TLI.isOperationLegal(ISD::ConstantFP, VT))) + if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) return DAG.getNode( ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), @@ -13423,12 +13678,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, EVT VT; unsigned AS; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); @@ -13462,38 +13727,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, VT.getTypeForEVT(*DAG.getContext()), AS); } -/// Try turning a load/store into a pre-indexed load/store when the base -/// pointer is an add or subtract and it has other uses besides the load/store. -/// After the transformation, the new indexed load/store has effectively folded -/// the add/subtract in and all of its other uses are redirected to the -/// new load/store. -bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (Level < AfterLegalizeDAG) - return false; - - bool isLoad = true; - SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { +static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, + bool &IsLoad, bool &IsMasked, SDValue &Ptr, + const TargetLowering &TLI) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { if (LD->isIndexed()) return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT)) return false; Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { if (ST->isIndexed()) return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT)) return false; Ptr = ST->getBasePtr(); - isLoad = false; + IsLoad = false; + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) && + !TLI.isIndexedMaskedLoadLegal(Dec, VT)) + return false; + Ptr = LD->getBasePtr(); + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) && + !TLI.isIndexedMaskedStoreLegal(Dec, VT)) + return false; + Ptr = ST->getBasePtr(); + IsLoad = false; + IsMasked = true; } else { return false; } + return true; +} + +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool IsLoad = true; + bool IsMasked = false; + SDValue Ptr; + if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked, + Ptr, TLI)) + return false; // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. @@ -13535,8 +13826,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; // Check #2. - if (!isLoad) { - SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (!IsLoad) { + SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue() + : cast<StoreSDNode>(N)->getValue(); // Would require a copy. if (Val == BasePtr) @@ -13612,18 +13904,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; SDValue Result; - if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); - else - Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + if (!IsMasked) { + if (IsLoad) + Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + else + Result = + DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + } else { + if (IsLoad) + Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + else + Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + } ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13677,7 +13977,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // We can now generate the new expression. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); - SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, DL, @@ -13687,7 +13987,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Replace the uses of Ptr with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); AddToWorklist(Result.getNode()); @@ -13702,29 +14002,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; - bool isLoad = true; + bool IsLoad = true; + bool IsMasked = false; SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - if (LD->isIndexed()) - return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) - return false; - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - if (ST->isIndexed()) - return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) - return false; - Ptr = ST->getBasePtr(); - isLoad = false; - } else { + if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, + Ptr, TLI)) return false; - } if (Ptr.getNode()->hasOneUse()) return false; @@ -13760,7 +14043,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // If all the uses are load / store addresses, then don't do the // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { bool RealUse = false; for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) @@ -13786,18 +14069,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { Worklist.push_back(Op); if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { - SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + SDValue Result; + if (!IsMasked) + Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM) + : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13809,7 +14098,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(isLoad ? 1 : 0)); + Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Op); return true; } @@ -13923,8 +14212,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. if (DAG.getDataLayout().isBigEndian()) - Offset = (STMemType.getStoreSizeInBits() - - LDMemType.getStoreSizeInBits()) / 8 - Offset; + Offset = ((int64_t)STMemType.getStoreSizeInBits() - + (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset; // Check that the stored value cover all bits that are loaded. bool STCoversLD = @@ -14066,7 +14355,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { return V; // Try to infer better alignment information than the load already has. - if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { SDValue NewLoad = DAG.getExtLoad( @@ -14786,8 +15075,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { SDLoc DL(IVal); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), - Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL); NewAlign = MinAlign(NewAlign, StOffset); } @@ -14898,10 +15186,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) return SDValue(); - SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), - Ptr.getValueType(), Ptr, - DAG.getConstant(PtrOff, SDLoc(LD), - Ptr.getValueType())); + SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD)); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, @@ -15081,7 +15366,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // The latest Node in the DAG. SDLoc DL(StoreNodes[0].MemNode); - int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); + TypeSize ElementSizeBits = MemVT.getStoreSizeInBits(); unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; @@ -15466,7 +15751,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { Attribute::NoImplicitFloat); // This function cannot currently deal with non-byte-sized memory sizes. - if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) return false; if (!MemVT.isSimple()) @@ -16015,6 +16300,9 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { if (Value.getOpcode() == ISD::TargetConstantFP) return SDValue(); + if (!ISD::isNormalStore(ST)) + return SDValue(); + SDLoc DL(ST); SDValue Chain = ST->getChain(); @@ -16075,8 +16363,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getAlignment(), MMOFlags, AAInfo); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4, DL, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), @@ -16111,8 +16398,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, - ST->getPointerInfo(), ST->getAlignment(), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); + ST->getMemOperand()); } } @@ -16121,7 +16407,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return Chain; // Try to infer better alignment information than the store already has. - if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { SDValue NewStore = @@ -16451,9 +16737,7 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { // Lower value store. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getAlignment(), MMOFlags, AAInfo); - Ptr = - DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL); // Higher value store. SDValue St1 = DAG.getStore(St0, DL, Hi, Ptr, @@ -16464,11 +16748,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { /// Convert a disguised subvector insertion into a shuffle: SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); SDValue InsertVal = N->getOperand(1); SDValue Vec = N->getOperand(0); - // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) - // --> (vector_shuffle X, Y) + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), + // InsIndex) + // --> (vector_shuffle X, Y) and variations where shuffle operands may be + // CONCAT_VECTORS. if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa<ConstantSDNode>(InsertVal.getOperand(1))) { @@ -16481,18 +16769,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { // Vec's operand 0 is using indices from 0 to N-1 and // operand 1 from N to 2N - 1, where N is the number of // elements in the vectors. - int XOffset = -1; - if (InsertVal.getOperand(0) == X) { - XOffset = 0; - } else if (InsertVal.getOperand(0) == Y) { - XOffset = X.getValueType().getVectorNumElements(); + SDValue InsertVal0 = InsertVal.getOperand(0); + int ElementOffset = -1; + + // We explore the inputs of the shuffle in order to see if we find the + // source of the extract_vector_elt. If so, we can use it to modify the + // shuffle rather than perform an insert_vector_elt. + SmallVector<std::pair<int, SDValue>, 8> ArgWorkList; + ArgWorkList.emplace_back(Mask.size(), Y); + ArgWorkList.emplace_back(0, X); + + while (!ArgWorkList.empty()) { + int ArgOffset; + SDValue ArgVal; + std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); + + if (ArgVal == InsertVal0) { + ElementOffset = ArgOffset; + break; + } + + // Peek through concat_vector. + if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { + int CurrentArgOffset = + ArgOffset + ArgVal.getValueType().getVectorNumElements(); + int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); + for (SDValue Op : reverse(ArgVal->ops())) { + CurrentArgOffset -= Step; + ArgWorkList.emplace_back(CurrentArgOffset, Op); + } + + // Make sure we went through all the elements and did not screw up index + // computation. + assert(CurrentArgOffset == ArgOffset); + } } - if (XOffset != -1) { + if (ElementOffset != -1) { SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); - NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue(); assert(NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); @@ -16562,13 +16879,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(2); SDLoc DL(N); - // If the inserted element is an UNDEF, just use the input vector. - if (InVal.isUndef()) - return InVec; - EVT VT = InVec.getValueType(); unsigned NumElts = VT.getVectorNumElements(); + // Insert into out-of-bounds element is undefined. + if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo)) + if (IndexC->getZExtValue() >= VT.getVectorNumElements()) + return DAG.getUNDEF(VT); + // Remove redundant insertions: // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -16683,7 +17001,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, // operand can't represent this new access since the offset is variable. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); } - NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); + NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. @@ -16723,8 +17041,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, AddToWorklist(EVE); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddUsersToWorklist(Load.getNode()); // Add users too - AddToWorklist(Load.getNode()); + AddToWorklistWithUsers(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -18239,22 +18556,61 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { return DAG.getBitcast(NVT, NewExtract); } } - // TODO - handle (DestNumElts % SrcNumElts) == 0 + if ((DestNumElts % SrcNumElts) == 0) { + unsigned DestSrcRatio = DestNumElts / SrcNumElts; + if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) { + unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio; + EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getScalarType(), NewExtNumElts); + if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { + unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio; + SDLoc DL(N); + SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); + SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } + } + } } - // Combine: - // (extract_subvec (concat V1, V2, ...), i) - // Into: - // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same - // type. - if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) && - V.getOperand(0).getValueType() == NVT) { - unsigned Idx = N->getConstantOperandVal(1); - unsigned NumElems = NVT.getVectorNumElements(); - assert((Idx % NumElems) == 0 && - "IDX in concat is not a multiple of the result vector length."); - return V->getOperand(Idx / NumElems); + if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) { + EVT ConcatSrcVT = V.getOperand(0).getValueType(); + assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && + "Concat and extract subvector do not change element type"); + + unsigned ExtIdx = N->getConstantOperandVal(1); + unsigned ExtNumElts = NVT.getVectorNumElements(); + assert(ExtIdx % ExtNumElts == 0 && + "Extract index is not a multiple of the input vector length."); + + unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements(); + unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts; + + // If the concatenated source types match this extract, it's a direct + // simplification: + // extract_subvec (concat V1, V2, ...), i --> Vi + if (ConcatSrcNumElts == ExtNumElts) + return V.getOperand(ConcatOpIdx); + + // If the concatenated source vectors are a multiple length of this extract, + // then extract a fraction of one of those source vectors directly from a + // concat operand. Example: + // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> + // v2i8 extract_subvec v8i8 Y, 6 + if (ConcatSrcNumElts % ExtNumElts == 0) { + SDLoc DL(N); + unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; + assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && + "Trying to extract from >1 concat operand?"); + assert(NewExtIdx % ExtNumElts == 0 && + "Extract index is not a multiple of the input vector length."); + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, + V.getOperand(ConcatOpIdx), NewIndexC); + } } V = peekThroughBitcasts(V); @@ -18962,6 +19318,30 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // A shuffle of a concat of the same narrow vector can be reduced to use + // only low-half elements of a concat with undef: + // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask' + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() && + N0.getNumOperands() == 2 && + N0.getOperand(0) == N0.getOperand(1)) { + int HalfNumElts = (int)NumElts / 2; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= HalfNumElts) { + assert(Idx < (int)NumElts && "Shuffle mask chooses undef op"); + Idx -= HalfNumElts; + } + NewMask.push_back(Idx); + } + if (TLI.isShuffleMaskLegal(NewMask, VT)) { + SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType()); + SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N0.getOperand(0), UndefVec); + return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask); + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -19446,8 +19826,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { int EltIdx = i / Split; int SubIdx = i % Split; SDValue Elt = RHS.getOperand(EltIdx); + // X & undef --> 0 (not undef). So this lane must be converted to choose + // from the zero constant vector (same as if the element had all 0-bits). if (Elt.isUndef()) { - Indices.push_back(-1); + Indices.push_back(i + NumSubElts); continue; } @@ -19460,14 +19842,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); // Extract the sub element from the constant bit mask. - if (DAG.getDataLayout().isBigEndian()) { - Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); - } else { - Bits.lshrInPlace(SubIdx * NumSubBits); - } - - if (Split > 1) - Bits = Bits.trunc(NumSubBits); + if (DAG.getDataLayout().isBigEndian()) + Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits); + else + Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); if (Bits.isAllOnesValue()) Indices.push_back(i); @@ -19910,22 +20288,28 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; - SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); - SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); - AddToWorklist(Shift.getNode()); - - if (XType.bitsGT(AType)) { - Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) { + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); - } - if (CC == ISD::SETGT) - Shift = DAG.getNOT(DL, Shift, AType); + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } - return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } } - SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); + unsigned ShCt = XType.getSizeInBits() - 1; + if (TLI.shouldAvoidTransformToShift(XType, ShCt)) + return SDValue(); + + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); @@ -20035,31 +20419,29 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. - // TODO: The operation legality checks could be loosened to include "custom", - // but that may cause regressions for targets that do not have shift - // instructions. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && - TLI.isOperationLegal(ISD::SHL, VT) && - TLI.isOperationLegal(ISD::SRA, VT)) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); - SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), - getShiftAmountTy(AndLHS.getValueType())); - SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); - - // Now arithmetic right shift it all the way over, so the result is either - // all-ones, or zero. - SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), - getShiftAmountTy(Shl.getValueType())); - SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); - - return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + unsigned ShCt = AndMask.getBitWidth() - 1; + if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { + SDValue ShlAmt = + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), + getShiftAmountTy(AndLHS.getValueType())); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); + + // Now arithmetic right shift it all the way over, so the result is + // either all-ones, or zero. + SDValue ShrAmt = + DAG.getConstant(ShCt, SDLoc(Shl), + getShiftAmountTy(Shl.getValueType())); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); + + return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + } } } @@ -20073,7 +20455,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) { if (Swap) { - CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger()); + CC = ISD::getSetCCInverse(CC, CmpOpVT); std::swap(N2C, N3C); } @@ -20101,10 +20483,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (N2C->isOne()) return Temp; + unsigned ShCt = N2C->getAPIntValue().logBase2(); + if (TLI.shouldAvoidTransformToShift(VT, ShCt)) + return SDValue(); + // shl setcc result by log2 n2c return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - SDLoc(Temp), + DAG.getConstant(ShCt, SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } @@ -20237,7 +20622,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// Result = N X_i + X_i (N - N A X_i) SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags) { - if (Level >= AfterLegalizeDAG) + if (LegalDAG) return SDValue(); // TODO: Handle half and/or extended types? @@ -20376,7 +20761,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Op can be zero. SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { - if (Level >= AfterLegalizeDAG) + if (LegalDAG) return SDValue(); // TODO: Handle half and/or extended types? @@ -20411,9 +20796,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, SDLoc DL(Op); EVT CCVT = getSetCCResultType(VT); ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; - const Function &F = DAG.getMachineFunction().getFunction(); - Attribute Denorms = F.getFnAttribute("denormal-fp-math"); - if (Denorms.getValueAsString().equals("ieee")) { + DenormalMode DenormMode = DAG.getDenormalMode(VT); + if (DenormMode == DenormalMode::IEEE) { // fabs(X) < SmallestNormal ? 0.0 : Est const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 6d7260d7aee5..2bec8613e79c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -410,8 +410,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { else if (isa<ConstantPointerNull>(V)) // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = getRegForValue( - Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + Reg = + getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType()))); else if (const auto *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) Reg = fastMaterializeFloatZero(CF); @@ -1190,6 +1190,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { Flags.setSwiftSelf(); if (Arg.IsSwiftError) Flags.setSwiftError(); + if (Arg.IsCFGuardTarget) + Flags.setCFGuardTarget(); if (Arg.IsByVal) Flags.setByVal(); if (Arg.IsInAlloca) { @@ -1236,10 +1238,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) { - const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); - MF->addCodeViewHeapAllocSite(CLI.Call, MD); - } + if (CLI.CS) + if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite")) + CLI.Call->setHeapAllocMarker(*MF, MD); return true; } @@ -1275,6 +1276,10 @@ bool FastISel::lowerCall(const CallInst *CI) { bool IsTailCall = CI->isTailCall(); if (IsTailCall && !isInTailCallPosition(CS, TM)) IsTailCall = false; + if (IsTailCall && MF->getFunction() + .getFnAttribute("disable-tail-calls") + .getValueAsString() == "true") + IsTailCall = false; CallLoweringInfo CLI; CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) @@ -1926,7 +1931,8 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), - SkipTargetIndependentISel(SkipTargetIndependentISel) {} + SkipTargetIndependentISel(SkipTargetIndependentISel), + LastLocalValue(nullptr), EmitStartPt(nullptr) {} FastISel::~FastISel() = default; diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index cf6711adad48..fa33400cd4b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -144,7 +144,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (AI->isStaticAlloca() && (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); - uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); + uint64_t TySize = + MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize(); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. @@ -159,6 +160,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI); } + // Scalable vectors may need a special StackID to distinguish + // them from other (fixed size) stack objects. + if (Ty->isVectorTy() && Ty->getVectorIsScalable()) + MF->getFrameInfo().setStackID(FrameIndex, + TFI->getStackIDForScalableVectors()); + StaticAllocaMap[AI] = FrameIndex; // Update the catch handler information. if (Iter != CatchObjects.end()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c5095995ec2e..c613c2540628 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -882,8 +882,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Flags.hasExact()) MI->setFlag(MachineInstr::MIFlag::IsExact); - if (Flags.hasFPExcept()) - MI->setFlag(MachineInstr::MIFlag::FPExcept); + if (Flags.hasNoFPExcept()) + MI->setFlag(MachineInstr::MIFlag::NoFPExcept); } // Emit all of the actual operands of this instruction, adding them to the diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f9fdf525240f..80ac8b95e4ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -134,26 +134,27 @@ private: ArrayRef<int> Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - bool &NeedInvert, const SDLoc &dl); + bool &NeedInvert, const SDLoc &dl, SDValue &Chain, + bool IsSignaling = false); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); - std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, bool isSigned); - SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128); + void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); - SDValue ExpandArgFPLibCall(SDNode *Node, - RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128); + void ExpandArgFPLibCall(SDNode *Node, + RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -172,12 +173,11 @@ private: SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, - const SDLoc &dl); - SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - const SDLoc &dl); - SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - const SDLoc &dl); + SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); + void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, + SmallVectorImpl<SDValue> &Results); + void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, + SmallVectorImpl<SDValue> &Results); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); @@ -421,6 +421,9 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + if (!ISD::isNormalStore(ST)) + return SDValue(); + LLVM_DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. @@ -466,8 +469,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(4, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), MinAlign(Alignment, 4U), MMOFlags, AAInfo); @@ -577,9 +579,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, @@ -793,9 +793,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, @@ -824,9 +822,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, @@ -1013,6 +1009,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); + break; case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); @@ -1023,11 +1031,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(2).getValueType()); break; case ISD::SELECT_CC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 3 : + Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 : Node->getOpcode() == ISD::SETCC ? 2 : 1; - unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : + Node->getOpcode() == ISD::STRICT_FSETCC ? 1 : + Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); @@ -1105,16 +1119,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; - case ISD::STRICT_LRINT: - case ISD::STRICT_LLRINT: - case ISD::STRICT_LROUND: - case ISD::STRICT_LLROUND: - // These pseudo-ops are the same as the other STRICT_ ops except - // they are registered with setOperationAction() using the input type - // instead of the output type. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getOperand(1).getValueType()); - break; case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -1125,7 +1129,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: - case ISD::UMULFIXSAT: { + case ISD::UMULFIXSAT: + case ISD::SDIVFIX: + case ISD::UDIVFIX: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1408,7 +1414,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); - Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl); // If the destination vector element type is narrower than the source // element type, only store the bits necessary. @@ -1471,8 +1477,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, } else { // Advance the pointer so that the loaded byte will contain the sign bit. unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; - IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, - DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); + IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL); State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, ByteOffset); } @@ -1629,10 +1634,9 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. -bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, - SDValue &RHS, SDValue &CC, - bool &NeedInvert, - const SDLoc &dl) { +bool SelectionDAGLegalize::LegalizeSetCCCondCode( + EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, + const SDLoc &dl, SDValue &Chain, bool IsSignaling) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; @@ -1650,7 +1654,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, } // Swapping operands didn't work. Try inverting the condition. bool NeedSwap = false; - InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); + InvCC = getSetCCInverse(CCCode, OpVT); if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { // If inverting the condition is not enough, try swapping operands // on top of it. @@ -1715,13 +1719,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); } + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), + SetCC2.getValue(1)); LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); @@ -2077,52 +2084,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } -// Expand a node into a call to a libcall. Similar to -// ExpandLibCall except that the first operand is the in-chain. -std::pair<SDValue, SDValue> -SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, - bool isSigned) { - SDValue InChain = Node->getOperand(0); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { - EVT ArgVT = Node->getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Node->getOperand(i); - Entry.Ty = ArgTy; - Entry.IsSExt = isSigned; - Entry.IsZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(SDLoc(Node)) - .setChain(InChain) - .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, - std::move(Args)) - .setSExtResult(isSigned) - .setZExtResult(!isSigned); - - std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo; -} - -SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128) { - if (Node->isStrictFPOpcode()) - Node = DAG.mutateStrictFPToFP(Node); - +void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2132,7 +2100,22 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } - return ExpandLibCall(LC, Node, false); + + if (Node->isStrictFPOpcode()) { + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + // FIXME: This doesn't support tail calls. + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Ops, CallOptions, + SDLoc(Node), + Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } else { + SDValue Tmp = ExpandLibCall(LC, Node, false); + Results.push_back(Tmp); + } } SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, @@ -2155,17 +2138,17 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, /// Expand the node to a libcall based on first argument type (for instance /// lround and its variant). -SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, - RTLIB::Libcall Call_F32, - RTLIB::Libcall Call_F64, - RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_F128, - RTLIB::Libcall Call_PPCF128) { - if (Node->isStrictFPOpcode()) - Node = DAG.mutateStrictFPToFP(Node); +void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128, + SmallVectorImpl<SDValue> &Results) { + EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType(); RTLIB::Libcall LC; - switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { + switch (InVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -2174,7 +2157,21 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, case MVT::ppcf128: LC = Call_PPCF128; break; } - return ExpandLibCall(LC, Node, false); + if (Node->isStrictFPOpcode()) { + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + // FIXME: This doesn't support tail calls. + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Ops, CallOptions, + SDLoc(Node), + Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } else { + SDValue Tmp = ExpandLibCall(LC, Node, false); + Results.push_back(Tmp); + } } /// Issue libcalls to __{u}divmod to compute div / rem pairs. @@ -2344,9 +2341,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. -SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, - EVT DestVT, - const SDLoc &dl) { +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, + SDValue &Chain) { + bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP); + EVT DestVT = Node->getValueType(0); + SDLoc dl(Node); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Op0 = Node->getOperand(OpNo); EVT SrcVT = Op0.getValueType(); // TODO: Should any fast-math-flags be set for the created nodes? @@ -2393,16 +2395,39 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, BitsToDouble(0x4330000080000000ULL) : BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); - // subtract the bias - SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); - // final result - SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); + // Subtract the bias and get the final result. + SDValue Sub; + SDValue Result; + if (Node->isStrictFPOpcode()) { + Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, + {Node->getOperand(0), Load, Bias}); + Chain = Sub.getValue(1); + if (DestVT != Sub.getValueType()) { + std::pair<SDValue, SDValue> ResultPair; + ResultPair = + DAG.getStrictFPExtendOrRound(Sub, Chain, dl, DestVT); + Result = ResultPair.first; + Chain = ResultPair.second; + } + else + Result = Sub; + } else { + Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); + } return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. + // FIXME: This can produce slightly incorrect results. See details in + // FIXME: https://reviews.llvm.org/D69275 - SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + SDValue Tmp1; + if (Node->isStrictFPOpcode()) { + Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other }, + { Node->getOperand(0), Op0 }); + } else + Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); @@ -2448,6 +2473,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, FudgeInReg = Handle.getValue(); } + if (Node->isStrictFPOpcode()) { + SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other }, + { Tmp1.getValue(1), Tmp1, FudgeInReg }); + Chain = Result.getValue(1); + return Result; + } + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } @@ -2456,9 +2488,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. -SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, - bool isSigned, - const SDLoc &dl) { +void SelectionDAGLegalize::PromoteLegalINT_TO_FP( + SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) { + bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; + EVT DestVT = N->getValueType(0); + SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0); + unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP; + unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP; + // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2470,15 +2509,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { - OpToUse = ISD::SINT_TO_FP; + if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) { + OpToUse = SIntOp; break; } - if (isSigned) continue; + if (IsSigned) + continue; // If the target supports UINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { - OpToUse = ISD::UINT_TO_FP; + if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) { + OpToUse = UIntOp; break; } @@ -2487,9 +2527,21 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, // Okay, we found the operation and type to use. Zero extend our input to the // desired type then run the operation on it. - return DAG.getNode(OpToUse, dl, DestVT, - DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - dl, NewInTy, LegalOp)); + if (IsStrict) { + SDValue Res = + DAG.getNode(OpToUse, dl, {DestVT, MVT::Other}, + {N->getOperand(0), + DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)}); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + return; + } + + Results.push_back( + DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp))); } /// This function is responsible for legalizing a @@ -2497,9 +2549,13 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. -SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, - bool isSigned, - const SDLoc &dl) { +void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, + SmallVectorImpl<SDValue> &Results) { + bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; + EVT DestVT = N->getValueType(0); + SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0); // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; @@ -2512,26 +2568,32 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, // A larger signed type can hold all unsigned values of the requested type, // so using FP_TO_SINT is valid - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { - OpToUse = ISD::FP_TO_SINT; + OpToUse = IsStrict ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT; + if (TLI.isOperationLegalOrCustom(OpToUse, NewOutTy)) break; - } // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. - if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { - OpToUse = ISD::FP_TO_UINT; + OpToUse = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT; + if (!IsSigned && TLI.isOperationLegalOrCustom(OpToUse, NewOutTy)) break; - } // Otherwise, try a larger type. } // Okay, we found the operation and type to use. - SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + SDValue Operation; + if (IsStrict) { + SDVTList VTs = DAG.getVTList(NewOutTy, MVT::Other); + Operation = DAG.getNode(OpToUse, dl, VTs, N->getOperand(0), LegalOp); + } else + Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); // Truncate the result of the extended FP_TO_*INT operation to the desired // size. - return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); + Results.push_back(Trunc); + if (IsStrict) + Results.push_back(Operation.getValue(1)); } /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. @@ -2812,12 +2874,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::STRICT_FP_ROUND: - // This expansion does not honor the "strict" properties anyway, - // so prefer falling back to the non-strict operation if legal. + // When strict mode is enforced we can't do expansion because it + // does not honor the "strict" properties. Only libcall is allowed. + if (TLI.isStrictFPEnabled()) + break; + // We might as well mutate to FP_ROUND when FP_ROUND operation is legal + // since this operation is more efficient than stack operation. if (TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)) == TargetLowering::Legal) break; + // We fall back to use stack operation when the FP_ROUND operation + // isn't available. Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2832,12 +2900,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: - // This expansion does not honor the "strict" properties anyway, - // so prefer falling back to the non-strict operation if legal. + // When strict mode is enforced we can't do expansion because it + // does not honor the "strict" properties. Only libcall is allowed. + if (TLI.isStrictFPEnabled()) + break; + // We might as well mutate to FP_EXTEND when FP_EXTEND operation is legal + // since this operation is more efficient than stack operation. if (TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)) == TargetLowering::Legal) break; + // We fall back to use stack operation when the FP_EXTEND operation + // isn't available. Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getOperand(1).getValueType(), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2883,15 +2957,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::UINT_TO_FP: - if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { + case ISD::STRICT_UINT_TO_FP: + if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) { Results.push_back(Tmp1); + if (Node->isStrictFPOpcode()) + Results.push_back(Tmp2); break; } LLVM_FALLTHROUGH; case ISD::SINT_TO_FP: - Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, - Node->getOperand(0), Node->getValueType(0), dl); + case ISD::STRICT_SINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2); Results.push_back(Tmp1); + if (Node->isStrictFPOpcode()) + Results.push_back(Tmp2); break; case ISD::FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) @@ -3340,6 +3419,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::UMULFIXSAT: Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: + if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node), + Node->getOperand(0), + Node->getOperand(1), + Node->getConstantOperandVal(2), + DAG)) { + Results.push_back(V); + break; + } + // FIXME: We might want to retry here with a wider type if we fail, if that + // type is legal. + // FIXME: Technically, so long as we only have sdivfixes where BW+Scale is + // <= 128 (which is the case for all of the default Embedded-C types), + // we will only get here with types and scales that we could always expand + // if we were allowed to generate libcalls to division functions of illegal + // type. But we cannot do that. + llvm_unreachable("Cannot expand DIVFIX!"); case ISD::ADDCARRY: case ISD::SUBCARRY: { SDValue LHS = Node->getOperand(0); @@ -3503,12 +3600,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } Results.push_back(Tmp1); break; - case ISD::SETCC: { - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - Tmp3 = Node->getOperand(2); - bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, - Tmp3, NeedInvert, dl); + case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + bool IsStrict = Node->getOpcode() != ISD::SETCC; + bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; + SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); + unsigned Offset = IsStrict ? 1 : 0; + Tmp1 = Node->getOperand(0 + Offset); + Tmp2 = Node->getOperand(1 + Offset); + Tmp3 = Node->getOperand(2 + Offset); + bool Legalized = + LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, + NeedInvert, dl, Chain, IsSignaling); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the @@ -3523,9 +3627,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); Results.push_back(Tmp1); + if (IsStrict) + Results.push_back(Chain); + break; } + // FIXME: It seems Legalized is false iff CCCode is Legal. I don't + // understand if this code is useful for strict nodes. + assert(!IsStrict && "Don't know how to expand for strict nodes."); + // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); @@ -3548,11 +3659,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::SELECT_CC: { + // TODO: need to add STRICT_SELECT_CC and STRICT_SELECT_CCS Tmp1 = Node->getOperand(0); // LHS Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False EVT VT = Node->getValueType(0); + SDValue Chain; SDValue CC = Node->getOperand(4); ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); @@ -3574,8 +3687,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). - ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, - Tmp1.getValueType().isInteger()); + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType()); if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false Legalized = true; @@ -3595,9 +3707,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } if (!Legalized) { - Legalized = LegalizeSetCCCondCode( - getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, - dl); + Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, CC, NeedInvert, dl, Chain); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); @@ -3623,13 +3734,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::BR_CC: { + // TODO: need to add STRICT_BR_CC and STRICT_BR_CCS + SDValue Chain; Tmp1 = Node->getOperand(0); // Chain Tmp2 = Node->getOperand(2); // LHS Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( - Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + bool Legalized = + LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2, + Tmp3, Tmp4, NeedInvert, dl, Chain); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); @@ -3677,7 +3791,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars); - ReplaceNode(SDValue(Node, 0), Result); + Results.push_back(Result); break; } case ISD::VECREDUCE_FADD: @@ -3705,10 +3819,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - break; + // Return true so that we don't call ConvertNodeToLibcall which also won't + // do anything. + return true; } - if (Results.empty() && Node->isStrictFPOpcode()) { + if (!TLI.isStrictFPEnabled() && Results.empty() && Node->isStrictFPOpcode()) { // FIXME: We were asked to expand a strict floating-point operation, // but there is currently no expansion implemented that would preserve // the "strict" properties. For now, we just fall back to the non-strict @@ -3793,7 +3909,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Ops, CallOptions, + SDLoc(Node), + Node->getOperand(0)); Results.push_back(Tmp.first); Results.push_back(Tmp.second); break; @@ -3815,38 +3937,38 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { } case ISD::FMINNUM: case ISD::STRICT_FMINNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128, Results); break; case ISD::FMAXNUM: case ISD::STRICT_FMAXNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128, Results); break; case ISD::FSQRT: case ISD::STRICT_FSQRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128, Results); break; case ISD::FCBRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, - RTLIB::CBRT_F80, RTLIB::CBRT_F128, - RTLIB::CBRT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, + RTLIB::CBRT_F80, RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128, Results); break; case ISD::FSIN: case ISD::STRICT_FSIN: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); + ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128, Results); break; case ISD::FCOS: case ISD::STRICT_FCOS: - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); + ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128, Results); break; case ISD::FSINCOS: // Expand into sincos libcall. @@ -3855,181 +3977,204 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::FLOG: case ISD::STRICT_FLOG: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, - RTLIB::LOG_FINITE_F64, - RTLIB::LOG_FINITE_F80, - RTLIB::LOG_FINITE_F128, - RTLIB::LOG_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, + RTLIB::LOG_FINITE_F64, + RTLIB::LOG_FINITE_F80, + RTLIB::LOG_FINITE_F128, + RTLIB::LOG_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128, Results); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, - RTLIB::LOG2_FINITE_F64, - RTLIB::LOG2_FINITE_F80, - RTLIB::LOG2_FINITE_F128, - RTLIB::LOG2_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, + RTLIB::LOG2_FINITE_F64, + RTLIB::LOG2_FINITE_F80, + RTLIB::LOG2_FINITE_F128, + RTLIB::LOG2_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128, Results); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, - RTLIB::LOG10_FINITE_F64, - RTLIB::LOG10_FINITE_F80, - RTLIB::LOG10_FINITE_F128, - RTLIB::LOG10_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, + RTLIB::LOG10_FINITE_F64, + RTLIB::LOG10_FINITE_F80, + RTLIB::LOG10_FINITE_F128, + RTLIB::LOG10_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); + ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128, Results); break; case ISD::FEXP: case ISD::STRICT_FEXP: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, - RTLIB::EXP_FINITE_F64, - RTLIB::EXP_FINITE_F80, - RTLIB::EXP_FINITE_F128, - RTLIB::EXP_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, + RTLIB::EXP_FINITE_F64, + RTLIB::EXP_FINITE_F80, + RTLIB::EXP_FINITE_F128, + RTLIB::EXP_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128, Results); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, - RTLIB::EXP2_FINITE_F64, - RTLIB::EXP2_FINITE_F80, - RTLIB::EXP2_FINITE_F128, - RTLIB::EXP2_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, + RTLIB::EXP2_FINITE_F64, + RTLIB::EXP2_FINITE_F80, + RTLIB::EXP2_FINITE_F128, + RTLIB::EXP2_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); + ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128, Results); break; case ISD::FTRUNC: case ISD::STRICT_FTRUNC: - Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128)); + ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128, Results); break; case ISD::FFLOOR: case ISD::STRICT_FFLOOR: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128, Results); break; case ISD::FCEIL: case ISD::STRICT_FCEIL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128)); + ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128, Results); break; case ISD::FRINT: case ISD::STRICT_FRINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128, Results); break; case ISD::FNEARBYINT: case ISD::STRICT_FNEARBYINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128)); + ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128, Results); break; case ISD::FROUND: case ISD::STRICT_FROUND: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128)); + ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128, Results); break; case ISD::FPOWI: - case ISD::STRICT_FPOWI: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128)); + case ISD::STRICT_FPOWI: { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::POWI_F32; break; + case MVT::f64: LC = RTLIB::POWI_F64; break; + case MVT::f80: LC = RTLIB::POWI_F80; break; + case MVT::f128: LC = RTLIB::POWI_F128; break; + case MVT::ppcf128: LC = RTLIB::POWI_PPCF128; break; + } + if (!TLI.getLibcallName(LC)) { + // Some targets don't have a powi libcall; use pow instead. + SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), + Node->getValueType(0), + Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node), + Node->getValueType(0), Node->getOperand(0), + Exponent)); + break; + } + ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128, Results); break; + } case ISD::FPOW: case ISD::STRICT_FPOW: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, - RTLIB::POW_FINITE_F64, - RTLIB::POW_FINITE_F80, - RTLIB::POW_FINITE_F128, - RTLIB::POW_FINITE_PPCF128)); + ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, + RTLIB::POW_FINITE_F64, + RTLIB::POW_FINITE_F80, + RTLIB::POW_FINITE_F128, + RTLIB::POW_FINITE_PPCF128, Results); else - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); + ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128, Results); break; case ISD::LROUND: case ISD::STRICT_LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128, Results); break; case ISD::LLROUND: case ISD::STRICT_LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128, Results); break; case ISD::LRINT: case ISD::STRICT_LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128, Results); break; case ISD::LLRINT: case ISD::STRICT_LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); + ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128, Results); break; case ISD::FDIV: - Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128)); + case ISD::STRICT_FDIV: + ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128, Results); break; case ISD::FREM: case ISD::STRICT_FREM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128)); + ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128, Results); break; case ISD::FMA: case ISD::STRICT_FMA: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_F128, - RTLIB::FMA_PPCF128)); + ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128, Results); break; case ISD::FADD: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128)); + case ISD::STRICT_FADD: + ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128, Results); break; case ISD::FMUL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128)); + case ISD::STRICT_FMUL: + ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128, Results); break; case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { @@ -4044,9 +4189,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; } case ISD::FSUB: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128)); + case ISD::STRICT_FSUB: + ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, @@ -4129,6 +4275,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } + if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP || + Node->getOpcode() == ISD::STRICT_SINT_TO_FP) + OVT = Node->getOperand(1).getSimpleValueType(); if (Node->getOpcode() == ISD::BR_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); @@ -4177,16 +4326,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: - Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), - Node->getOpcode() == ISD::FP_TO_SINT, dl); - Results.push_back(Tmp1); + case ISD::STRICT_FP_TO_SINT: + PromoteLegalFP_TO_INT(Node, dl, Results); break; case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: - Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), - Node->getOpcode() == ISD::SINT_TO_FP, dl); - Results.push_back(Tmp1); + case ISD::STRICT_SINT_TO_FP: + PromoteLegalINT_TO_FP(Node, dl, Results); break; case ISD::VAARG: { SDValue Chain = Node->getOperand(0); // Get the chain. @@ -4358,6 +4507,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FREM: + case ISD::STRICT_FPOW: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1), + Tmp2.getValue(1)); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp3, Tmp1, Tmp2}); + Tmp1 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp1); + Results.push_back(Tmp1.getValue(1)); + break; case ISD::FMA: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); @@ -4404,6 +4568,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FEXP: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + break; case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 72d052473f11..f191160dee4f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -65,39 +65,68 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::STRICT_FMINNUM: case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; + case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; + case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::STRICT_FCOS: case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::STRICT_FDIV: case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::STRICT_FEXP: case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::STRICT_FEXP2: case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::STRICT_FLOG: case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::STRICT_FLOG2: case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::STRICT_FLOG10: case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::STRICT_FMA: case ISD::FMA: R = SoftenFloatRes_FMA(N); break; + case ISD::STRICT_FMUL: case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; + case ISD::STRICT_FPOW: case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::STRICT_FPOWI: case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::STRICT_FRINT: case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::STRICT_FROUND: case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -111,6 +140,46 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } } +SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (1 + Offset) && + "Unexpected number of operands!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpVT = N->getOperand(0 + Offset).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (2 + Offset) && + "Unexpected number of operands!"); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -174,69 +243,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMIN_F32, - RTLIB::FMIN_F64, - RTLIB::FMIN_F80, - RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMAX_F32, - RTLIB::FMAX_F64, - RTLIB::FMAX_F80, - RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_F128, - RTLIB::ADD_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CBRT_F32, + RTLIB::CBRT_F64, + RTLIB::CBRT_F80, + RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, - RTLIB::CEIL_F64, - RTLIB::CEIL_F80, - RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -288,231 +336,170 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, - RTLIB::COS_F64, - RTLIB::COS_F80, - RTLIB::COS_F128, - RTLIB::COS_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::DIV_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, - RTLIB::EXP_F64, - RTLIB::EXP_F80, - RTLIB::EXP_F128, - RTLIB::EXP_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, - RTLIB::EXP2_F64, - RTLIB::EXP2_F80, - RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, - RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, - RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, - RTLIB::LOG_F64, - RTLIB::LOG_F80, - RTLIB::LOG_F128, - RTLIB::LOG_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, - RTLIB::LOG2_F64, - RTLIB::LOG2_F80, - RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, - RTLIB::LOG10_F64, - RTLIB::LOG10_F80, - RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)), - GetSoftenedFloat(N->getOperand(2)) }; + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)), + GetSoftenedFloat(N->getOperand(2 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[3] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType(), - N->getOperand(2).getValueType() }; + EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType(), + N->getOperand(2 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_F128, - RTLIB::FMA_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, + GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, CallOptions, SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_F128, - RTLIB::MUL_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(0); - if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { - // Expand Y = FNEG(X) -> Y = X ^ sign mask - APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); - return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), - DAG.getConstant(SignMask, dl, NVT)); - } - - // Expand Y = FNEG(X) -> Y = SUB -0.0, X - SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), - GetSoftenedFloat(N->getOperand(0)) }; - TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, dl).first; + // Expand Y = FNEG(X) -> Y = X ^ sign mask + APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), + DAG.getConstant(SignMask, dl, NVT)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); - // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's - // entirely possible for both f16 and f32 to be legal, so use the fully - // hard-float FP_EXTEND rather than FP16_TO_FP. - if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { - Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); - if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) - AddToWorklist(Op.getNode()); - } + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { Op = GetPromotedFloat(Op); // If the promotion did the FP_EXTEND to the destination type for us, // there's nothing left to do here. - if (Op.getValueType() == N->getValueType(0)) { + if (Op.getValueType() == N->getValueType(0)) return BitConvertToInteger(Op); + } + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), + { MVT::f32, MVT::Other }, { Chain, Op }); + Chain = Op.getValue(1); + } else { + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); } } RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -535,167 +522,127 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); - if (N->getValueType(0) == MVT::f16) { - // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a - // storage-only type get a chance to select things. - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); - } - + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, - RTLIB::POW_F64, - RTLIB::POW_F80, - RTLIB::POW_F128, - RTLIB::POW_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { - assert(N->getOperand(1).getValueType() == MVT::i32 && + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 && "Unsupported power type!"); + RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_F128, + RTLIB::POWI_PPCF128); + if (!TLI.getLibcallName(LC)) { + // Some targets don't have a powi libcall; use pow instead. + // FIXME: Implement this if some target needs it. + DAG.getContext()->emitError("Don't know how to soften fpowi to fpow"); + return DAG.getUNDEF(N->getValueType(0)); + } + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + N->getOperand(1 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_F128, - RTLIB::POWI_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, - RTLIB::REM_F64, - RTLIB::REM_F80, - RTLIB::REM_F128, - RTLIB::REM_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, - RTLIB::RINT_F64, - RTLIB::RINT_F80, - RTLIB::RINT_F128, - RTLIB::RINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, - RTLIB::SIN_F64, - RTLIB::SIN_F80, - RTLIB::SIN_F128, - RTLIB::SIN_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, - RTLIB::SQRT_F64, - RTLIB::SQRT_F80, - RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - if (N->getValueType(0) == MVT::f16) - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, - RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, - RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -715,8 +662,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - if (N != NewL.getValue(1).getNode()) - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -771,8 +717,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { - bool Signed = N->getOpcode() == ISD::SINT_TO_FP; - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; + EVT SVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); SDLoc dl(N); @@ -790,16 +738,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, - NVT, N->getOperand(0)); + NVT, N->getOperand(IsStrict ? 1 : 0)); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(Signed); - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, - TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - Op, CallOptions, dl).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + Op, CallOptions, dl, Chain); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } @@ -822,18 +774,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::STRICT_LROUND: case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break; + case ISD::STRICT_LLROUND: case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; + case ISD::STRICT_LRINT: case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; + case ISD::STRICT_LLRINT: case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -845,7 +806,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand promotion"); + "Invalid operand softening"); ReplaceValueWith(SDValue(N, 0), Res); return false; @@ -857,42 +818,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { - // If we get here, the result must be legal but the source illegal. - EVT SVT = N->getOperand(0).getValueType(); - EVT RVT = N->getValueType(0); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (SVT == MVT::f16) - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); - - RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; -} - - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. - assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_ROUND); - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + Op = GetSoftenedFloat(Op); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -920,8 +873,12 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { - bool Signed = N->getOpcode() == ISD::FP_TO_SINT; - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; + + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); SDLoc dl(N); @@ -937,18 +894,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { NVT = (MVT::SimpleValueType)IntVT; // The type needs to big enough to hold the result. if (NVT.bitsGE(RVT)) - LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT); } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + Op = GetSoftenedFloat(Op); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, dl, Chain); // Truncate the result if the libcall returns a larger type. - return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); + SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first); + + if (!IsStrict) + return Res; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -976,26 +941,39 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { - SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); - ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); + SDValue Op1 = N->getOperand(IsStrict ? 2 : 1); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); + + EVT VT = Op0.getValueType(); + SDValue NewLHS = GetSoftenedFloat(Op0); + SDValue NewRHS = GetSoftenedFloat(Op1); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), Op0, Op1, + Chain, N->getOpcode() == ISD::STRICT_FSETCCS); - EVT VT = NewLHS.getValueType(); - NewLHS = GetSoftenedFloat(NewLHS); - NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), - N->getOperand(0), N->getOperand(1)); - - // If softenSetCCOperands returned a scalar, use it. - if (!NewRHS.getNode()) { - assert(NewLHS.getValueType() == N->getValueType(0) && - "Unexpected setcc expansion!"); - return NewLHS; + // Update N to have the operands specified. + if (NewRHS.getNode()) { + if (IsStrict) + NewLHS = DAG.getNode(ISD::SETCC, SDLoc(N), N->getValueType(0), NewLHS, + NewRHS, DAG.getCondCode(CCCode)); + else + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } - // Otherwise, update N to have the operands specified. - return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, - DAG.getCondCode(CCCode)), - 0); + // Otherwise, softenSetCCOperands returned a scalar, use it. + assert((NewRHS.getNode() || NewLHS.getValueType() == N->getValueType(0)) && + "Unexpected setcc expansion!"); + + if (IsStrict) { + ReplaceValueWith(SDValue(N, 0), NewLHS); + ReplaceValueWith(SDValue(N, 1), Chain); + return SDValue(); + } + return NewLHS; } SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { @@ -1016,72 +994,99 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LROUND_F32, - RTLIB::LROUND_F64, - RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT LVT = LHS.getValueType(); + EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits()); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RSize - LSize; + if (SizeDiff > 0) { + RHS = + DAG.getNode(ISD::SRL, dl, RVT, RHS, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS); + } else if (SizeDiff < 0) { + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS); + RHS = + DAG.getNode(ISD::SHL, dl, ILVT, RHS, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + } + + RHS = DAG.getBitcast(LVT, RHS); + return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, - RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(0 + Offset).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + + return Tmp.first; } -SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); +} - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LRINT_F32, - RTLIB::LRINT_F64, - RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; +SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); +} - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, - RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; +SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); } //===----------------------------------------------------------------------===// @@ -1122,35 +1127,61 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::STRICT_FMINNUM: case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; + case ISD::STRICT_FADD: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; + case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::STRICT_FCOS: case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::STRICT_FDIV: case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::STRICT_FEXP: case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::STRICT_FEXP2: case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::STRICT_FLOG: case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::STRICT_FLOG2: case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::STRICT_FLOG10: case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::STRICT_FMA: case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; + case ISD::STRICT_FMUL: case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::STRICT_FPOW: case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::STRICT_FPOWI: case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::STRICT_FRINT: case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::STRICT_FROUND: case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::STRICT_FSQRT: case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::STRICT_FREM: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; } @@ -1174,6 +1205,36 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, dl, NVT); } +void DAGTypeLegalizer::ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Op = N->getOperand(0 + Offset); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), + Op, CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), + Ops, CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && @@ -1190,181 +1251,159 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, + RTLIB::CBRT_F64, RTLIB::CBRT_F80, + RTLIB::CBRT_F128, + RTLIB::CBRT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::COPYSIGN_F32, - RTLIB::COPYSIGN_F64, - RTLIB::COPYSIGN_F80, - RTLIB::COPYSIGN_F128, - RTLIB::COPYSIGN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COPYSIGN_F32, + RTLIB::COPYSIGN_F64, + RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, + RTLIB::COPYSIGN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[3] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset), + N->getOperand(2 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + RTLIB::MUL_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, @@ -1379,106 +1418,105 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + bool IsStrict = N->isStrictFPOpcode(); + + SDValue Chain; + if (IsStrict) { + // If the expanded type is the same as the input type, just bypass the node. + if (NVT == N->getOperand(1).getValueType()) { + Hi = N->getOperand(1); + Chain = N->getOperand(0); + } else { + // Other we need to extend. + Hi = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + Chain = Hi.getValue(1); + } + } else { + Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + } + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(NVT.getSizeInBits(), 0)), dl, NVT); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Chain); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - TargetLowering::MakeLibCallOptions CallOptions; - SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N)).first; - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128), - N, false); - GetPairElements(Call, Lo, Hi); + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, @@ -1619,8 +1657,11 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break; case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break; @@ -1709,34 +1750,72 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { - assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + bool IsStrict = N->isStrictFPOpcode(); + assert(N->getOperand(IsStrict ? 1 : 0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); SDValue Lo, Hi; - GetExpandedFloat(N->getOperand(0), Lo, Hi); - // Round it the rest of the way (e.g. to f32) if needed. - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), - N->getValueType(0), Hi, N->getOperand(1)); + GetExpandedFloat(N->getOperand(IsStrict ? 1 : 0), Lo, Hi); + + if (!IsStrict) + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0), Hi, N->getOperand(1)); + + // Eliminate the node if the input float type is the same as the output float + // type. + if (Hi.getValueType() == N->getValueType(0)) { + // Connect the output chain to the input chain, unlinking the node. + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Hi); + return SDValue(); + } + + SDValue Expansion = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + {N->getValueType(0), MVT::Other}, + {N->getOperand(0), Hi, N->getOperand(2)}); + ReplaceValueWith(SDValue(N, 1), Expansion.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Expansion); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, dl, Chain); + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), - CallOptions, dl).first; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, dl, Chain); + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1800,7 +1879,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, @@ -1813,7 +1892,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, @@ -1826,7 +1905,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, @@ -1839,7 +1918,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { EVT RVT = N->getValueType(0); - EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, @@ -1963,12 +2042,11 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) { // code. SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) { EVT VT = N->getValueType(0); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op0 = GetPromotedFloat(N->getOperand(0)); SDValue Op1 = GetPromotedFloat(N->getOperand(1)); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode); + return DAG.getSetCC(SDLoc(N), VT, Op0, Op1, CCCode); } @@ -2026,6 +2104,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: case ISD::FEXP: @@ -2304,7 +2383,6 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { EVT VT = N->getValueType(0); - EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); AtomicSDNode *AM = cast<AtomicSDNode>(N); SDLoc SL(N); @@ -2318,13 +2396,19 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { { AM->getChain(), AM->getBasePtr(), CastVal }, AM->getMemOperand()); - SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, - NewAtomic); + SDValue Result = NewAtomic; + + if (getTypeAction(VT) == TargetLowering::TypePromoteFloat) { + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + Result = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, + NewAtomic); + } + // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1)); - return ResultCast; + return Result; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index d5c1b539adbd..0e46f8d68f83 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -75,6 +75,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; @@ -158,6 +160,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: @@ -337,8 +342,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { // The input is widened to the same size. Convert to the widened value. // Make sure that the outgoing value is not a vector, because this would // make us bitcast between two vectors which are legalized in different ways. - if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) - return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) { + SDValue Res = + DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + + // For big endian targets we need to shift the casted value or the + // interesting bits will end up at the wrong place. + if (DAG.getDataLayout().isBigEndian()) { + unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); + EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout()); + assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!"); + Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res, + DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); + } + return Res; + } // If the output type is also a vector and widening it to the same size // as the widened input type would be a legal type, we can widen the bitcast // and handle the promotion after. @@ -365,15 +383,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { CreateStackStoreLoad(InOp, OutVT)); } -// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount +// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount // in the VT returned by getShiftAmountTy and to return a safe VT if we can't. -static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT, - const TargetLowering &TLI, +static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, SelectionDAG &DAG) { EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - // If the value won't fit in the prefered type, just use something safe. It - // will be legalized when the shift is expanded. - if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits()) + // If any possible shift value won't fit in the prefered type, just use + // something safe. It will be legalized when the shift is expanded. + if (!ShiftVT.isVector() && + ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits())) ShiftVT = MVT::i32; return ShiftVT; } @@ -385,7 +403,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); } @@ -397,7 +415,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); @@ -592,8 +610,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - N->getMask(), ExtPassThru, N->getMemoryVT(), - N->getMemOperand(), ISD::EXTLOAD); + N->getOffset(), N->getMask(), ExtPassThru, + N->getMemoryVT(), N->getMemOperand(), + N->getAddressingMode(), ISD::EXTLOAD); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -604,7 +623,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); assert(NVT == ExtPassThru.getValueType() && - "Gather result type and the passThru agrument type should be the same"); + "Gather result type and the passThru argument type should be the same"); SDLoc dl(N); SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(), @@ -762,6 +781,71 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { N->getOperand(2)); } +static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS, + unsigned Scale, const TargetLowering &TLI, + SelectionDAG &DAG) { + EVT VT = LHS.getValueType(); + bool Signed = N->getOpcode() == ISD::SDIVFIX; + + SDLoc dl(N); + // See if we can perform the division in this type without widening. + if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, + DAG)) + return V; + + // If that didn't work, double the type width and try again. That must work, + // or something is wrong. + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getScalarSizeInBits() * 2); + if (Signed) { + LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT); + RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT); + } else { + LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT); + RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT); + } + + // TODO: Saturation. + + SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, + DAG); + assert(Res && "Expanding DIVFIX with wide type failed?"); + return DAG.getZExtOrTrunc(Res, dl, VT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { + SDLoc dl(N); + SDValue Op1Promoted, Op2Promoted; + bool Signed = N->getOpcode() == ISD::SDIVFIX; + if (Signed) { + Op1Promoted = SExtPromotedInteger(N->getOperand(0)); + Op2Promoted = SExtPromotedInteger(N->getOperand(1)); + } else { + Op1Promoted = ZExtPromotedInteger(N->getOperand(0)); + Op2Promoted = ZExtPromotedInteger(N->getOperand(1)); + } + EVT PromotedType = Op1Promoted.getValueType(); + unsigned Scale = N->getConstantOperandVal(2); + + SDValue Res; + // If the type is already legal and the operation is legal in that type, we + // should not early expand. + if (TLI.isTypeLegal(PromotedType)) { + TargetLowering::LegalizeAction Action = + TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale); + if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) + Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, + Op2Promoted, N->getOperand(2)); + } + + if (!Res) + Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG); + + // TODO: Saturation. + + return Res; +} + SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); @@ -816,7 +900,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT InVT = N->getOperand(0).getValueType(); + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + EVT InVT = N->getOperand(OpNo).getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT SVT = getSetCCResultType(InVT); @@ -835,12 +920,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { } SDLoc dl(N); - assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() && "Vector compare must return a vector result!"); // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + SDValue SetCC; + if (N->isStrictFPOpcode()) { + EVT VTs[] = {SVT, MVT::Other}; + SDValue Opers[] = {N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)}; + SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1)); + } else + SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); @@ -1058,8 +1153,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. unsigned Shift = SmallVT.getScalarSizeInBits(); - EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(), - TLI, DAG); + EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG); SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getConstant(Shift, DL, ShiftTy)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, @@ -1176,6 +1270,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N), @@ -1189,6 +1284,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; @@ -1209,7 +1305,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: - case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break; + case ISD::UMULFIXSAT: + case ISD::SDIVFIX: + case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1465,6 +1563,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SExtPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + SExtPromotedInteger(N->getOperand(1))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); @@ -1486,11 +1589,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, SDLoc dl(N); bool TruncateStore = false; - if (OpNo == 3) { + if (OpNo == 4) { Mask = PromoteTargetBoolean(Mask, DataVT); // Update in place. SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); - NewOps[3] = Mask; + NewOps[4] = Mask; return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } else { // Data operand assert(OpNo == 1 && "Unexpected operand for promotion"); @@ -1498,14 +1601,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, TruncateStore = true; } - return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, - N->getMemoryVT(), N->getMemOperand(), + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), TruncateStore, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) { - assert(OpNo == 2 && "Only know how to promote the mask!"); + assert(OpNo == 3 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); @@ -1563,6 +1667,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { ZExtPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { SDLoc dl(N); SDValue Op = GetPromotedInteger(N->getOperand(0)); @@ -1584,7 +1693,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) { SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); return SDValue( DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0); @@ -1697,10 +1806,14 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; - case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break; - case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break; + case ISD::STRICT_LLROUND: + case ISD::STRICT_LLRINT: + case ISD::LLROUND: + case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; @@ -1794,6 +1907,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break; + case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -1817,7 +1933,11 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - return ExpandChainLibCall(LC, Node, false); + EVT RetVT = Node->getValueType(0); + SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end()); + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), + Node->getOperand(0)); } /// N is a shift by a value that needs to be expanded, @@ -2304,11 +2424,27 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue Ovf; - bool HasOpCarry = TLI.isOperationLegalOrCustom( - N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, - TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + unsigned CarryOp, NoCarryOp; + ISD::CondCode Cond; + switch(N->getOpcode()) { + case ISD::UADDO: + CarryOp = ISD::ADDCARRY; + NoCarryOp = ISD::ADD; + Cond = ISD::SETULT; + break; + case ISD::USUBO: + CarryOp = ISD::SUBCARRY; + NoCarryOp = ISD::SUB; + Cond = ISD::SETUGT; + break; + default: + llvm_unreachable("Node has unexpected Opcode"); + } - if (HasOpCarry) { + bool HasCarryOp = TLI.isOperationLegalOrCustom( + CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + + if (HasCarryOp) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; GetExpandedInteger(LHS, LHSL, LHSH); @@ -2317,22 +2453,19 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; - unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(Opc, dl, VTList, HiOps); + Hi = DAG.getNode(CarryOp, dl, VTList, HiOps); Ovf = Hi.getValue(1); } else { // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. - auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; - SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS); + SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS); SplitInteger(Sum, Lo, Hi); // Calculate the overflow: addition overflows iff a + b < a, and subtraction // overflows iff a - b > a. - auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); } @@ -2544,7 +2677,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); @@ -2552,8 +2687,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, @@ -2561,75 +2700,94 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } -void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, - SDValue &Hi) { - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - if (VT == MVT::f32) - LC = RTLIB::LLROUND_F32; - else if (VT == MVT::f64) - LC = RTLIB::LLROUND_F64; - else if (VT == MVT::f80) - LC = RTLIB::LLROUND_F80; - else if (VT == MVT::f128) - LC = RTLIB::LLROUND_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LLROUND_PPCF128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!"); +void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); - SDValue Op = N->getOperand(0); - if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) - Op = GetPromotedFloat(Op); + assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat && + "Input type needs to be promoted!"); - SDLoc dl(N); - EVT RetVT = N->getValueType(0); - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, - Lo, Hi); -} + EVT VT = Op.getValueType(); -void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, - SDValue &Hi) { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; - if (VT == MVT::f32) - LC = RTLIB::LLRINT_F32; - else if (VT == MVT::f64) - LC = RTLIB::LLRINT_F64; - else if (VT == MVT::f80) - LC = RTLIB::LLRINT_F80; - else if (VT == MVT::f128) - LC = RTLIB::LLRINT_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LLRINT_PPCF128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!"); - - SDValue Op = N->getOperand(0); - if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) - Op = GetPromotedFloat(Op); + if (N->getOpcode() == ISD::LLROUND || + N->getOpcode() == ISD::STRICT_LLROUND) { + if (VT == MVT::f32) + LC = RTLIB::LLROUND_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLROUND_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLROUND_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLROUND_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLROUND_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!"); + } else if (N->getOpcode() == ISD::LLRINT || + N->getOpcode() == ISD::STRICT_LLRINT) { + if (VT == MVT::f32) + LC = RTLIB::LLRINT_F32; + else if (VT == MVT::f64) + LC = RTLIB::LLRINT_F64; + else if (VT == MVT::f80) + LC = RTLIB::LLRINT_F80; + else if (VT == MVT::f128) + LC = RTLIB::LLRINT_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LLRINT_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!"); + } else + llvm_unreachable("Unexpected opcode!"); SDLoc dl(N); EVT RetVT = N->getValueType(0); + SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue(); + TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT, + Op, CallOptions, dl, + Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (N->isStrictFPOpcode()) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi) { + if (N->isAtomic()) { + // It's typical to have larger CAS than atomic load instructions. + SDLoc dl(N); + EVT VT = N->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + VT, VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, N->getMemOperand()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); + return; + } + if (ISD::isNormalLoad(N)) { ExpandRes_NormalLoad(N, Lo, Hi); return; @@ -2684,8 +2842,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); @@ -2709,8 +2866,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Alignment, MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -3068,6 +3224,13 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); } +void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1), + N->getConstantOperandVal(2), TLI, DAG); + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); @@ -3596,9 +3759,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; + case ISD::STRICT_SINT_TO_FP: case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::STRICT_UINT_TO_FP: case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; case ISD::SHL: @@ -3865,17 +4030,37 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT DstVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain); + + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (N->isAtomic()) { + // It's typical to have larger CAS than atomic store instructions. + SDLoc dl(N); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + N->getMemoryVT(), + N->getOperand(0), N->getOperand(2), + N->getOperand(1), + N->getMemOperand()); + return Swap.getValue(1); + } if (ISD::isNormalStore(N)) return ExpandOp_NormalStore(N, OpNo); @@ -3965,81 +4150,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { - SDValue Op = N->getOperand(0); - EVT SrcVT = Op.getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT DstVT = N->getValueType(0); - SDLoc dl(N); - - // The following optimization is valid only if every value in SrcVT (when - // treated as signed) is representable in DstVT. Check that the mantissa - // size of DstVT is >= than the number of bits in SrcVT -1. - const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); - if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && - TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ - // Do a signed conversion then adjust the result. - SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); - SignedConv = TLI.LowerOperation(SignedConv, DAG); - - // The result of the signed conversion needs adjusting if the 'sign bit' of - // the incoming integer was set. To handle this, we dynamically test to see - // if it is set, and, if so, add a fudge factor. - - const uint64_t F32TwoE32 = 0x4F800000ULL; - const uint64_t F32TwoE64 = 0x5F800000ULL; - const uint64_t F32TwoE128 = 0x7F800000ULL; - - APInt FF(32, 0); - if (SrcVT == MVT::i32) - FF = APInt(32, F32TwoE32); - else if (SrcVT == MVT::i64) - FF = APInt(32, F32TwoE64); - else if (SrcVT == MVT::i128) - FF = APInt(32, F32TwoE128); - else - llvm_unreachable("Unsupported UINT_TO_FP!"); - - // Check whether the sign bit is set. - SDValue Lo, Hi; - GetExpandedInteger(Op, Lo, Hi); - SDValue SignSet = DAG.getSetCC(dl, - getSetCCResultType(Hi.getValueType()), - Hi, - DAG.getConstant(0, dl, Hi.getValueType()), - ISD::SETLT); - - // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. - SDValue FudgePtr = - DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)), - TLI.getPointerTy(DAG.getDataLayout())); - - // Get a pointer to FF if the sign bit was set, or to 0 otherwise. - SDValue Zero = DAG.getIntPtrConstant(0, dl); - SDValue Four = DAG.getIntPtrConstant(4, dl); - if (DAG.getDataLayout().isBigEndian()) - std::swap(Zero, Four); - SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, - Zero, Four); - unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); - FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), - FudgePtr, Offset); - Alignment = std::min(Alignment, 4u); - - // Load the value out, extending it from f32 to the destination float type. - // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad( - ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, - Alignment); - return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); - } - - // Otherwise, use a libcall. - RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain); + + if (!IsStrict) + return Tmp.first; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index b596c174a287..63ddb59fce68 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -204,7 +204,8 @@ bool DAGTypeLegalizer::run() { // non-leaves. for (SDNode &Node : DAG.allnodes()) { if (Node.getNumOperands() == 0) { - AddToWorklist(&Node); + Node.setNodeId(ReadyToProcess); + Worklist.push_back(&Node); } else { Node.setNodeId(Unanalyzed); } @@ -974,68 +975,6 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } -/// Convert the node into a libcall with the same prototype. -SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, - bool isSigned) { - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(isSigned); - unsigned NumOps = N->getNumOperands(); - SDLoc dl(N); - if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, - dl).first; - } else if (NumOps == 1) { - SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, - dl).first; - } else if (NumOps == 2) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, - dl).first; - } - SmallVector<SDValue, 8> Ops(NumOps); - for (unsigned i = 0; i < NumOps; ++i) - Ops[i] = N->getOperand(i); - - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; -} - -/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that -/// the first operand is the in-chain. -std::pair<SDValue, SDValue> -DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, - bool isSigned) { - SDValue InChain = Node->getOperand(0); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { - EVT ArgVT = Node->getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Node->getOperand(i); - Entry.Ty = ArgTy; - Entry.IsSExt = isSigned; - Entry.IsZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(SDLoc(Node)) - .setChain(InChain) - .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, - std::move(Args)) - .setSExtResult(isSigned) - .setZExtResult(!isSigned); - - std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo; -} - /// Promote the given target boolean to a target boolean of the given type. /// A target boolean is an integer value, not necessarily of type i1, the bits /// of which conform to getBooleanContents. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4afbae69128a..faae14444d51 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -215,10 +215,7 @@ private: SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); SDValue JoinIntegers(SDValue Lo, SDValue Hi); - SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, bool isSigned); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); @@ -228,11 +225,6 @@ private: void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi); - void AddToWorklist(SDNode *N) { - N->setNodeId(ReadyToProcess); - Worklist.push_back(N); - } - //===--------------------------------------------------------------------===// // Integer Promotion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// @@ -337,6 +329,7 @@ private: SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); + SDValue PromoteIntRes_DIVFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); SDValue PromoteIntRes_ABS(SDNode *N); @@ -362,9 +355,11 @@ private: SDValue PromoteIntOp_Shift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N); SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); @@ -373,7 +368,7 @@ private: SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_MULFIX(SDNode *N); + SDValue PromoteIntOp_FIX(SDNode *N); SDValue PromoteIntOp_FPOWI(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); @@ -411,8 +406,7 @@ private: void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -435,6 +429,7 @@ private: void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -486,6 +481,8 @@ private: // Convert Float Results to Integer. void SoftenFloatResult(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC); + SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); @@ -495,6 +492,7 @@ private: SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCBRT(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); @@ -530,9 +528,9 @@ private: // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); SDValue SoftenFloatOp_LROUND(SDNode *N); @@ -542,6 +540,7 @@ private: SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Float Expansion Support: LegalizeFloatTypes.cpp @@ -559,10 +558,15 @@ private: // Float Result Expansion. void ExpandFloatResult(SDNode *N, unsigned ResNo); void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCBRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -676,7 +680,6 @@ private: SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); - SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); @@ -688,7 +691,7 @@ private: SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); - SDValue ScalarizeVecRes_MULFIX(SDNode *N); + SDValue ScalarizeVecRes_FIX(SDNode *N); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -730,7 +733,7 @@ private: void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -804,6 +807,7 @@ private: SDValue WidenVSELECTAndMask(SDNode *N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); + SDValue WidenVecRes_STRICT_FSETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); @@ -833,6 +837,7 @@ private: SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); + SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 5562f400b6e1..c45c62cabc05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -169,9 +169,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, dl, - StackPtr.getValueType())); + StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -248,6 +246,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); + assert(!LD->isAtomic() && "Atomics can not be split"); EVT ValueVT = LD->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); @@ -262,8 +261,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize), @@ -459,6 +457,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); + assert(!St->isAtomic() && "Atomics can not be split"); EVT ValueVT = St->getValue().getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 15c3a0b6cfad..7d0b1ee6ae07 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -75,95 +75,95 @@ class VectorLegalizer { SDValue LegalizeOp(SDValue Op); /// Assuming the node is legal, "legalize" the results. - SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); + + /// Make sure Results are legal and update the translation cache. + SDValue RecursivelyLegalizeResults(SDValue Op, + MutableArrayRef<SDValue> Results); + + /// Wrapper to interface LowerOperation with a vector of Results. + /// Returns false if the target wants to use default expansion. Otherwise + /// returns true. If return is true and the Results are empty, then the + /// target wants to keep the input node as is. + bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); /// Implements unrolling a VSETCC. - SDValue UnrollVSETCC(SDValue Op); + SDValue UnrollVSETCC(SDNode *Node); /// Implement expand-based legalization of vector operations. /// /// This is just a high-level routine to dispatch to specific code paths for /// operations to legalize them. - SDValue Expand(SDValue Op); + void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if /// FP_TO_SINT isn't legal. - SDValue ExpandFP_TO_UINT(SDValue Op); + void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if /// SINT_TO_FLOAT and SHR on vectors isn't legal. - SDValue ExpandUINT_TO_FLOAT(SDValue Op); + void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. - SDValue ExpandSEXTINREG(SDValue Op); + SDValue ExpandSEXTINREG(SDNode *Node); /// Implement expansion for ANY_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and bitcasts to the proper /// type. The contents of the bits in the extended part of each element are /// undef. - SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place, bitcasts to the proper /// type, then shifts left and arithmetic shifts right to introduce a sign /// extension. - SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and blends zeros into /// the remaining lanes, finally bitcasting to the proper type. - SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); - - /// Implement expand-based legalization of ABS vector operations. - /// If following expanding is legal/custom then do it: - /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1)) - /// else unroll the operation. - SDValue ExpandABS(SDValue Op); + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); /// Expand bswap of vectors into a shuffle if legal. - SDValue ExpandBSWAP(SDValue Op); + SDValue ExpandBSWAP(SDNode *Node); /// Implement vselect in terms of XOR, AND, OR when blend is not /// supported by the target. - SDValue ExpandVSELECT(SDValue Op); - SDValue ExpandSELECT(SDValue Op); - SDValue ExpandLoad(SDValue Op); - SDValue ExpandStore(SDValue Op); - SDValue ExpandFNEG(SDValue Op); - SDValue ExpandFSUB(SDValue Op); - SDValue ExpandBITREVERSE(SDValue Op); - SDValue ExpandCTPOP(SDValue Op); - SDValue ExpandCTLZ(SDValue Op); - SDValue ExpandCTTZ(SDValue Op); - SDValue ExpandFunnelShift(SDValue Op); - SDValue ExpandROT(SDValue Op); - SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); - SDValue ExpandUADDSUBO(SDValue Op); - SDValue ExpandSADDSUBO(SDValue Op); - SDValue ExpandMULO(SDValue Op); - SDValue ExpandAddSubSat(SDValue Op); - SDValue ExpandFixedPointMul(SDValue Op); - SDValue ExpandStrictFPOp(SDValue Op); + SDValue ExpandVSELECT(SDNode *Node); + SDValue ExpandSELECT(SDNode *Node); + std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); + SDValue ExpandStore(SDNode *N); + SDValue ExpandFNEG(SDNode *Node); + void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); + SDValue ExpandFixedPointDiv(SDNode *Node); + SDValue ExpandStrictFPOp(SDNode *Node); + void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and /// bitcasting the result back to the original type. - SDValue Promote(SDValue Op); + void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements [SU]INT_TO_FP vector promotion. /// /// This is a [zs]ext of the input operand to a larger integer type. - SDValue PromoteINT_TO_FP(SDValue Op); + void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); /// Implements FP_TO_[SU]INT vector promotion of the result type. /// /// It is promoted to a larger integer type. The result is then /// truncated back to the original type. - SDValue PromoteFP_TO_INT(SDValue Op); + void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); public: VectorLegalizer(SelectionDAG& dag) : @@ -219,11 +219,27 @@ bool VectorLegalizer::Run() { return Changed; } -SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { + assert(Op->getNumValues() == Result->getNumValues() && + "Unexpected number of results"); // Generic legalization: just pass the operand through. - for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) - AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); - return Result.getValue(Op.getResNo()); + for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), SDValue(Result, i)); + return SDValue(Result, Op.getResNo()); +} + +SDValue +VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, + MutableArrayRef<SDValue> Results) { + assert(Results.size() == Op->getNumValues() && + "Unexpected number of results"); + // Make sure that the generated code is itself legal. + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + Results[i] = LegalizeOp(Results[i]); + AddLegalizedOperand(Op.getValue(i), Results[i]); + } + + return Results[Op.getResNo()]; } SDValue VectorLegalizer::LegalizeOp(SDValue Op) { @@ -232,18 +248,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); if (I != LegalizedNodes.end()) return I->second; - SDNode* Node = Op.getNode(); - // Legalize the operands SmallVector<SDValue, 8> Ops; - for (const SDValue &Op : Node->op_values()) - Ops.push_back(LegalizeOp(Op)); + for (const SDValue &Oper : Op->op_values()) + Ops.push_back(LegalizeOp(Oper)); - SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), - Op.getResNo()); + SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops); if (Op.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + LoadSDNode *LD = cast<LoadSDNode>(Node); ISD::LoadExtType ExtType = LD->getExtensionType(); if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; @@ -252,26 +265,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - return TranslateLegalizeResults(Op, Result); - case TargetLowering::Custom: - if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { - assert(Lowered->getNumValues() == Op->getNumValues() && - "Unexpected number of results"); - if (Lowered != Result) { - // Make sure the new code is also legal. - Lowered = LegalizeOp(Lowered); - Changed = true; - } - return TranslateLegalizeResults(Op, Lowered); + return TranslateLegalizeResults(Op, Node); + case TargetLowering::Custom: { + SmallVector<SDValue, 2> ResultVals; + if (LowerOperationWrapper(Node, ResultVals)) { + if (ResultVals.empty()) + return TranslateLegalizeResults(Op, Node); + + Changed = true; + return RecursivelyLegalizeResults(Op, ResultVals); } LLVM_FALLTHROUGH; - case TargetLowering::Expand: + } + case TargetLowering::Expand: { Changed = true; - return ExpandLoad(Op); + std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node); + AddLegalizedOperand(Op.getValue(0), Tmp.first); + AddLegalizedOperand(Op.getValue(1), Tmp.second); + return Op.getResNo() ? Tmp.first : Tmp.second; + } } } } else if (Op.getOpcode() == ISD::STORE) { - StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + StoreSDNode *ST = cast<StoreSDNode>(Node); EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) { @@ -280,19 +296,24 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - return TranslateLegalizeResults(Op, Result); + return TranslateLegalizeResults(Op, Node); case TargetLowering::Custom: { - SDValue Lowered = TLI.LowerOperation(Result, DAG); - if (Lowered != Result) { - // Make sure the new code is also legal. - Lowered = LegalizeOp(Lowered); + SmallVector<SDValue, 1> ResultVals; + if (LowerOperationWrapper(Node, ResultVals)) { + if (ResultVals.empty()) + return TranslateLegalizeResults(Op, Node); + Changed = true; + return RecursivelyLegalizeResults(Op, ResultVals); } - return TranslateLegalizeResults(Op, Lowered); + LLVM_FALLTHROUGH; } - case TargetLowering::Expand: + case TargetLowering::Expand: { Changed = true; - return ExpandStore(Op); + SDValue Chain = ExpandStore(Node); + AddLegalizedOperand(Op, Chain); + return Chain; + } } } } @@ -300,55 +321,41 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { bool HasVectorValueOrOp = false; for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J) HasVectorValueOrOp |= J->isVector(); - for (const SDValue &Op : Node->op_values()) - HasVectorValueOrOp |= Op.getValueType().isVector(); + for (const SDValue &Oper : Node->op_values()) + HasVectorValueOrOp |= Oper.getValueType().isVector(); if (!HasVectorValueOrOp) - return TranslateLegalizeResults(Op, Result); + return TranslateLegalizeResults(Op, Node); TargetLowering::LegalizeAction Action = TargetLowering::Legal; + EVT ValVT; switch (Op.getOpcode()) { default: - return TranslateLegalizeResults(Op, Result); - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_EXTEND: + return TranslateLegalizeResults(Op, Node); + case ISD::MERGE_VALUES: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // This operation lies about being legal: when it claims to be legal, + // it should actually be expanded. + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" + ValVT = Node->getValueType(0); + if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_UINT_TO_FP) + ValVT = Node->getOperand(1).getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), ValVT); // If we're asked to expand a strict vector floating-point operation, // by default we're going to simply unroll it. That is usually the // best approach, except in the case where the resulting strict (scalar) // operations would themselves use the fallback mutation to non-strict. // In that specific case, just do the fallback on the vector op. - if (Action == TargetLowering::Expand && - TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)) - == TargetLowering::Legal) { - EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && + TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) == + TargetLowering::Legal) { + EVT EltVT = ValVT.getVectorElementType(); if (TLI.getOperationAction(Node->getOpcode(), EltVT) == TargetLowering::Expand && TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) @@ -454,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: - case ISD::UMULFIXSAT: { + case ISD::UMULFIXSAT: + case ISD::SDIVFIX: + case ISD::UDIVFIX: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -482,53 +491,90 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + SmallVector<SDValue, 8> ResultVals; switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: - Result = Promote(Op); - Changed = true; + LLVM_DEBUG(dbgs() << "Promoting\n"); + Promote(Node, ResultVals); + assert(!ResultVals.empty() && "No results for promotion?"); break; case TargetLowering::Legal: LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); break; - case TargetLowering::Custom: { + case TargetLowering::Custom: LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); - if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { - LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); - Result = Tmp1; + if (LowerOperationWrapper(Node, ResultVals)) break; - } LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; - } case TargetLowering::Expand: - Result = Expand(Op); + LLVM_DEBUG(dbgs() << "Expanding\n"); + Expand(Node, ResultVals); + break; } - // Make sure that the generated code is itself legal. - if (Result != Op) { - Result = LegalizeOp(Result); - Changed = true; + if (ResultVals.empty()) + return TranslateLegalizeResults(Op, Node); + + Changed = true; + return RecursivelyLegalizeResults(Op, ResultVals); +} + +// FIME: This is very similar to the X86 override of +// TargetLowering::LowerOperationWrapper. Can we merge them somehow? +bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + + if (!Res.getNode()) + return false; + + if (Res == SDValue(Node, 0)) + return true; + + // If the original node has one result, take the return value from + // LowerOperation as is. It might not be result number 0. + if (Node->getNumValues() == 1) { + Results.push_back(Res); + return true; } - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; + // If the original node has multiple results, then the return node should + // have the same number of results. + assert((Node->getNumValues() == Res->getNumValues()) && + "Lowering returned the wrong number of results!"); + + // Places new result values base on N result number. + for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); + + return true; } -SDValue VectorLegalizer::Promote(SDValue Op) { +void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // For a few operations there is a specific concept for promotion based on // the operand's type. - switch (Op.getOpcode()) { + switch (Node->getOpcode()) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: // "Promote" the operation by extending the operand. - return PromoteINT_TO_FP(Op); + PromoteINT_TO_FP(Node, Results); + return; case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: // Promote the operation by extending the operand. - return PromoteFP_TO_INT(Op); + PromoteFP_TO_INT(Node, Results); + return; + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + // These operations are used to do promotion so they can't be promoted + // themselves. + llvm_unreachable("Don't know how to promote this operation!"); } // There are currently two cases of vector promotion: @@ -536,91 +582,128 @@ SDValue VectorLegalizer::Promote(SDValue Op) { // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. // 2) Extending a vector of floats to a vector of the same number of larger // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. - MVT VT = Op.getSimpleValueType(); - assert(Op.getNode()->getNumValues() == 1 && + assert(Node->getNumValues() == 1 && "Can't promote a vector with multiple results!"); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); - SDLoc dl(Op); - SmallVector<SDValue, 4> Operands(Op.getNumOperands()); - - for (unsigned j = 0; j != Op.getNumOperands(); ++j) { - if (Op.getOperand(j).getValueType().isVector()) - if (Op.getOperand(j) + MVT VT = Node->getSimpleValueType(0); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + SDLoc dl(Node); + SmallVector<SDValue, 4> Operands(Node->getNumOperands()); + + for (unsigned j = 0; j != Node->getNumOperands(); ++j) { + if (Node->getOperand(j).getValueType().isVector()) + if (Node->getOperand(j) .getValueType() .getVectorElementType() .isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) - Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); + Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); else - Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j)); else - Operands[j] = Op.getOperand(j); + Operands[j] = Node->getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); + SDValue Res = + DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags()); + if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) - return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); + Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl)); else - return DAG.getNode(ISD::BITCAST, dl, VT, Op); + Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); + + Results.push_back(Res); } -SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { +void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. - MVT VT = Op.getOperand(0).getSimpleValueType(); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + bool IsStrict = Node->isStrictFPOpcode(); + MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && "Vectors have different number of elements!"); - SDLoc dl(Op); - SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + SDLoc dl(Node); + SmallVector<SDValue, 4> Operands(Node->getNumOperands()); - unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : - ISD::SIGN_EXTEND; - for (unsigned j = 0; j != Op.getNumOperands(); ++j) { - if (Op.getOperand(j).getValueType().isVector()) - Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::STRICT_UINT_TO_FP) + ? ISD::ZERO_EXTEND + : ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Node->getNumOperands(); ++j) { + if (Node->getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j)); else - Operands[j] = Op.getOperand(j); + Operands[j] = Node->getOperand(j); + } + + if (IsStrict) { + SDValue Res = DAG.getNode(Node->getOpcode(), dl, + {Node->getValueType(0), MVT::Other}, Operands); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + return; } - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); + SDValue Res = + DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands); + Results.push_back(Res); } // For FP_TO_INT we promote the result type to a vector type with wider // elements and then truncate the result. This is different from the default // PromoteVector which uses bitcast to promote thus assumning that the // promoted vector type has the same overall size. -SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) { - MVT VT = Op.getSimpleValueType(); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); +void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + MVT VT = Node->getSimpleValueType(0); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + bool IsStrict = Node->isStrictFPOpcode(); assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && "Vectors have different number of elements!"); - unsigned NewOpc = Op->getOpcode(); + unsigned NewOpc = Node->getOpcode(); // Change FP_TO_UINT to FP_TO_SINT if possible. // TODO: Should we only do this if FP_TO_UINT itself isn't legal? if (NewOpc == ISD::FP_TO_UINT && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; - SDLoc dl(Op); - SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0)); + if (NewOpc == ISD::STRICT_FP_TO_UINT && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDLoc dl(Node); + SDValue Promoted, Chain; + if (IsStrict) { + Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Chain = Promoted.getValue(1); + } else + Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. - Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext - : ISD::AssertSext, - dl, NVT, Promoted, + if (Node->getOpcode() == ISD::FP_TO_UINT || + Node->getOpcode() == ISD::STRICT_FP_TO_UINT) + NewOpc = ISD::AssertZext; + else + NewOpc = ISD::AssertSext; + + Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted, DAG.getValueType(VT.getScalarType())); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); + Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); + Results.push_back(Promoted); + if (IsStrict) + Results.push_back(Chain); } -SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); +std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); EVT SrcVT = LD->getMemoryVT(); EVT SrcEltVT = SrcVT.getScalarType(); @@ -629,7 +712,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue NewChain; SDValue Value; if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { - SDLoc dl(Op); + SDLoc dl(N); SmallVector<SDValue, 8> Vals; SmallVector<SDValue, 8> LoadChains; @@ -741,130 +824,157 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals); + Value = DAG.getBuildVector(N->getValueType(0), dl, Vals); } else { - SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - // Skip past MERGE_VALUE node if known. - if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { - NewChain = Scalarized.getOperand(1); - Value = Scalarized.getOperand(0); - } else { - NewChain = Scalarized.getValue(1); - Value = Scalarized.getValue(0); - } + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); } - AddLegalizedOperand(Op.getValue(0), Value); - AddLegalizedOperand(Op.getValue(1), NewChain); - - return (Op.getResNo() ? NewChain : Value); + return std::make_pair(Value, NewChain); } -SDValue VectorLegalizer::ExpandStore(SDValue Op) { - StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); +SDValue VectorLegalizer::ExpandStore(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); SDValue TF = TLI.scalarizeVectorStore(ST, DAG); - AddLegalizedOperand(Op, TF); return TF; } -SDValue VectorLegalizer::Expand(SDValue Op) { - switch (Op->getOpcode()) { +void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { + SDValue Tmp; + switch (Node->getOpcode()) { + case ISD::MERGE_VALUES: + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(Node->getOperand(i)); + return; case ISD::SIGN_EXTEND_INREG: - return ExpandSEXTINREG(Op); + Results.push_back(ExpandSEXTINREG(Node)); + return; case ISD::ANY_EXTEND_VECTOR_INREG: - return ExpandANY_EXTEND_VECTOR_INREG(Op); + Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node)); + return; case ISD::SIGN_EXTEND_VECTOR_INREG: - return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node)); + return; case ISD::ZERO_EXTEND_VECTOR_INREG: - return ExpandZERO_EXTEND_VECTOR_INREG(Op); + Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node)); + return; case ISD::BSWAP: - return ExpandBSWAP(Op); + Results.push_back(ExpandBSWAP(Node)); + return; case ISD::VSELECT: - return ExpandVSELECT(Op); + Results.push_back(ExpandVSELECT(Node)); + return; case ISD::SELECT: - return ExpandSELECT(Op); + Results.push_back(ExpandSELECT(Node)); + return; case ISD::FP_TO_UINT: - return ExpandFP_TO_UINT(Op); + ExpandFP_TO_UINT(Node, Results); + return; case ISD::UINT_TO_FP: - return ExpandUINT_TO_FLOAT(Op); + ExpandUINT_TO_FLOAT(Node, Results); + return; case ISD::FNEG: - return ExpandFNEG(Op); + Results.push_back(ExpandFNEG(Node)); + return; case ISD::FSUB: - return ExpandFSUB(Op); + ExpandFSUB(Node, Results); + return; case ISD::SETCC: - return UnrollVSETCC(Op); + Results.push_back(UnrollVSETCC(Node)); + return; case ISD::ABS: - return ExpandABS(Op); + if (TLI.expandABS(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::BITREVERSE: - return ExpandBITREVERSE(Op); + ExpandBITREVERSE(Node, Results); + return; case ISD::CTPOP: - return ExpandCTPOP(Op); + if (TLI.expandCTPOP(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return ExpandCTLZ(Op); + if (TLI.expandCTLZ(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return ExpandCTTZ(Op); + if (TLI.expandCTTZ(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::FSHL: case ISD::FSHR: - return ExpandFunnelShift(Op); + if (TLI.expandFunnelShift(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::ROTL: case ISD::ROTR: - return ExpandROT(Op); + if (TLI.expandROT(Node, Tmp, DAG)) { + Results.push_back(Tmp); + return; + } + break; case ISD::FMINNUM: case ISD::FMAXNUM: - return ExpandFMINNUM_FMAXNUM(Op); + if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::UADDO: case ISD::USUBO: - return ExpandUADDSUBO(Op); + ExpandUADDSUBO(Node, Results); + return; case ISD::SADDO: case ISD::SSUBO: - return ExpandSADDSUBO(Op); + ExpandSADDSUBO(Node, Results); + return; case ISD::UMULO: case ISD::SMULO: - return ExpandMULO(Op); + ExpandMULO(Node, Results); + return; case ISD::USUBSAT: case ISD::SSUBSAT: case ISD::UADDSAT: case ISD::SADDSAT: - return ExpandAddSubSat(Op); + if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SMULFIX: case ISD::UMULFIX: - return ExpandFixedPointMul(Op); + if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SMULFIXSAT: case ISD::UMULFIXSAT: // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly // why. Maybe it results in worse codegen compared to the unroll for some // targets? This should probably be investigated. And if we still prefer to // unroll an explanation could be helpful. - return DAG.UnrollVectorOp(Op.getNode()); - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - return ExpandStrictFPOp(Op); + break; + case ISD::SDIVFIX: + case ISD::UDIVFIX: + Results.push_back(ExpandFixedPointDiv(Node)); + return; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" + ExpandStrictFPOp(Node, Results); + return; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -878,22 +988,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: - return TLI.expandVecReduce(Op.getNode(), DAG); - default: - return DAG.UnrollVectorOp(Op.getNode()); + Results.push_back(TLI.expandVecReduce(Node, DAG)); + return; } + + Results.push_back(DAG.UnrollVectorOp(Node)); } -SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { +SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it // using XOR AND OR. The selector bit is broadcasted. - EVT VT = Op.getValueType(); - SDLoc DL(Op); + EVT VT = Node->getValueType(0); + SDLoc DL(Node); - SDValue Mask = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); assert(VT.isVector() && !Mask.getValueType().isVector() && Op1.getValueType() == Op2.getValueType() && "Invalid type"); @@ -907,7 +1018,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); // Generate a mask operand. EVT MaskTy = VT.changeVectorElementTypeToInteger(); @@ -936,36 +1047,35 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); - return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); + return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); } -SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { + EVT VT = Node->getValueType(0); // Make sure that the SRA and SHL instructions are available. if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); - SDLoc DL(Op); - EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); + SDLoc DL(Node); + EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT(); unsigned BW = VT.getScalarSizeInBits(); unsigned OrigBW = OrigTy.getScalarSizeInBits(); SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); - Op = Op.getOperand(0); - Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); + SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz); return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } // Generically expand a vector anyext in register to a shuffle of the relevant // lanes into the appropriate locations, with other lanes left undef. -SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { - SDLoc DL(Op); - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); int NumElements = VT.getVectorNumElements(); - SDValue Src = Op.getOperand(0); + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); int NumSrcElements = SrcVT.getVectorNumElements(); @@ -997,15 +1107,15 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); } -SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - SDValue Src = Op.getOperand(0); +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); // First build an any-extend node which can be legalized above when we // recurse through it. - Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); + SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); // Now we need sign extend. Do this by shifting the elements. Even if these // aren't legal operations, they have a better chance of being legalized @@ -1021,11 +1131,11 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { // Generically expand a vector zext in register to a shuffle of the relevant // lanes into the appropriate locations, a blend of zero into the high bits, // and a bitcast to the wider element type. -SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { - SDLoc DL(Op); - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); int NumElements = VT.getVectorNumElements(); - SDValue Src = Op.getOperand(0); + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); int NumSrcElements = SrcVT.getVectorNumElements(); @@ -1068,8 +1178,8 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { ShuffleMask.push_back((I * ScalarSizeInBytes) + J); } -SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { + EVT VT = Node->getValueType(0); // Generate a byte wise shuffle mask for the BSWAP. SmallVector<int, 16> ShuffleMask; @@ -1078,20 +1188,24 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { // Only emit a shuffle if the mask is legal. if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); - SDLoc DL(Op); - Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + SDLoc DL(Node); + SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } -SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { - EVT VT = Op.getValueType(); +void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + EVT VT = Node->getValueType(0); // If we have the scalar operation, it's probably cheaper to unroll it. - if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) - return DAG.UnrollVectorOp(Op.getNode()); + if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) { + SDValue Tmp = DAG.UnrollVectorOp(Node); + Results.push_back(Tmp); + return; + } // If the vector element width is a whole number of bytes, test if its legal // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte @@ -1108,35 +1222,39 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { - SDLoc DL(Op); - Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + SDLoc DL(Node); + SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), BSWAPMask); Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + Results.push_back(Op); + return; } } // If we have the appropriate vector bit operations, it is better to use them // than unrolling and expanding each component. - if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || - !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || - !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || - !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) - return DAG.UnrollVectorOp(Op.getNode()); - - // Let LegalizeDAG handle this later. - return Op; + if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) + // Let LegalizeDAG handle this later. + return; + + // Otherwise unroll. + SDValue Tmp = DAG.UnrollVectorOp(Node); + Results.push_back(Tmp); } -SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { +SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. - SDLoc DL(Op); + SDLoc DL(Node); - SDValue Mask = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); EVT VT = Mask.getValueType(); @@ -1152,13 +1270,13 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || TLI.getBooleanContents(Op1.getValueType()) != TargetLowering::ZeroOrNegativeOneBooleanContent) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); // If the mask and the type are different sizes, unroll the vector op. This // can occur when getSetCCResultType returns something that is different in // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. if (VT.getSizeInBits() != Op1.getValueSizeInBits()) - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -1173,46 +1291,61 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); - return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); -} - -SDValue VectorLegalizer::ExpandABS(SDValue Op) { - // Attempt to expand using TargetLowering. - SDValue Result; - if (TLI.expandABS(Op.getNode(), Result, DAG)) - return Result; - - // Otherwise go ahead and unroll. - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); } -SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { +void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { // Attempt to expand using TargetLowering. SDValue Result, Chain; - if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { - if (Op.getNode()->isStrictFPOpcode()) - // Relink the chain - DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); - return Result; + if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) { + Results.push_back(Result); + if (Node->isStrictFPOpcode()) + Results.push_back(Chain); + return; } // Otherwise go ahead and unroll. - return DAG.UnrollVectorOp(Op.getNode()); + if (Node->isStrictFPOpcode()) { + UnrollStrictFPOp(Node, Results); + return; + } + + Results.push_back(DAG.UnrollVectorOp(Node)); } -SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { - EVT VT = Op.getOperand(0).getValueType(); - SDLoc DL(Op); +void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + bool IsStrict = Node->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); + EVT VT = Src.getValueType(); + SDLoc DL(Node); // Attempt to expand using TargetLowering. SDValue Result; - if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) - return Result; + SDValue Chain; + if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) { + Results.push_back(Result); + if (IsStrict) + Results.push_back(Chain); + return; + } // Make sure that the SINT_TO_FP and SRL instructions are available. - if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) - return DAG.UnrollVectorOp(Op.getNode()); + if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) == + TargetLowering::Expand) || + (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) == + TargetLowering::Expand)) || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) { + if (IsStrict) { + UnrollStrictFPOp(Node, Results); + return; + } + + Results.push_back(DAG.UnrollVectorOp(Node)); + return; + } unsigned BW = VT.getScalarSizeInBits(); assert((BW == 64 || BW == 32) && @@ -1227,153 +1360,141 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType()); + SDValue TWOHW = + DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0)); // Clear upper part of LO, lower HI - SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); - SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask); + + if (IsStrict) { + // Convert hi and lo to floats + // Convert the hi part back to the upper values + // TODO: Can any fast-math-flags be set on these nodes? + SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, + {Node->getValueType(0), MVT::Other}, + {Node->getOperand(0), HI}); + fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other}, + {fHI.getValue(1), fHI, TWOHW}); + SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, + {Node->getValueType(0), MVT::Other}, + {Node->getOperand(0), LO}); + + SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1), + fLO.getValue(1)); + + // Add the two halves + SDValue Result = + DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other}, + {TF, fHI, fLO}); + + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + return; + } // Convert hi and lo to floats // Convert the hi part back to the upper values // TODO: Can any fast-math-flags be set on these nodes? - SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); - fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); - SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO); // Add the two halves - return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); + Results.push_back( + DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO)); } -SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { - if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { - SDLoc DL(Op); - SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); +SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) { + SDLoc DL(Node); + SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0)); // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. - return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), - Zero, Op.getOperand(0)); + return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero, + Node->getOperand(0)); } - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(Node); } -SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { +void VectorLegalizer::ExpandFSUB(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, // we can defer this to operation legalization where it will be lowered as // a+(-b). - EVT VT = Op.getValueType(); + EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && TLI.isOperationLegalOrCustom(ISD::FADD, VT)) - return Op; // Defer to LegalizeDAG - - return DAG.UnrollVectorOp(Op.getNode()); -} + return; // Defer to LegalizeDAG -SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) { - SDValue Result; - if (TLI.expandCTPOP(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { - SDValue Result; - if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); + SDValue Tmp = DAG.UnrollVectorOp(Node); + Results.push_back(Tmp); } -SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) { - SDValue Result; - if (TLI.expandCTTZ(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) { - SDValue Result; - if (TLI.expandFunnelShift(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandROT(SDValue Op) { - SDValue Result; - if (TLI.expandROT(Op.getNode(), Result, DAG)) - return Result; - - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { - if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG)) - return Expanded; - return DAG.UnrollVectorOp(Op.getNode()); -} - -SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) { +void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDValue Result, Overflow; - TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG); - - if (Op.getResNo() == 0) { - AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); - return Result; - } else { - AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); - return Overflow; - } + TLI.expandUADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) { +void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDValue Result, Overflow; - TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG); - - if (Op.getResNo() == 0) { - AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); - return Result; - } else { - AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); - return Overflow; - } + TLI.expandSADDSUBO(Node, Result, Overflow, DAG); + Results.push_back(Result); + Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandMULO(SDValue Op) { +void VectorLegalizer::ExpandMULO(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDValue Result, Overflow; - if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG)) - std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode()); + if (!TLI.expandMULO(Node, Result, Overflow, DAG)) + std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node); - if (Op.getResNo() == 0) { - AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); - return Result; - } else { - AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); - return Overflow; - } + Results.push_back(Result); + Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { - if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) +SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) { + SDNode *N = Node; + if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N), + N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG)) return Expanded; - return DAG.UnrollVectorOp(Op.getNode()); + return DAG.UnrollVectorOp(N); } -SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) { - if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG)) - return Expanded; - return DAG.UnrollVectorOp(Op.getNode()); +void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { + ExpandUINT_TO_FLOAT(Node, Results); + return; + } + if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { + ExpandFP_TO_UINT(Node, Results); + return; + } + + UnrollStrictFPOp(Node, Results); } -SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { - EVT VT = Op.getValueType(); +void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); - unsigned NumOpers = Op.getNumOperands(); + unsigned NumOpers = Node->getNumOperands(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ValueVTs[] = {EltVT, MVT::Other}; - SDValue Chain = Op.getOperand(0); - SDLoc dl(Op); + + EVT TmpEltVT = EltVT; + if (Node->getOpcode() == ISD::STRICT_FSETCC || + Node->getOpcode() == ISD::STRICT_FSETCCS) + TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TmpEltVT); + + EVT ValueVTs[] = {TmpEltVT, MVT::Other}; + SDValue Chain = Node->getOperand(0); + SDLoc dl(Node); SmallVector<SDValue, 32> OpValues; SmallVector<SDValue, 32> OpChains; @@ -1387,7 +1508,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { // Now process the remaining operands. for (unsigned j = 1; j < NumOpers; ++j) { - SDValue Oper = Op.getOperand(j); + SDValue Oper = Node->getOperand(j); EVT OperVT = Oper.getValueType(); if (OperVT.isVector()) @@ -1397,28 +1518,37 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { Opers.push_back(Oper); } - SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers); + SDValue ScalarResult = ScalarOp.getValue(0); + SDValue ScalarChain = ScalarOp.getValue(1); + + if (Node->getOpcode() == ISD::STRICT_FSETCC || + Node->getOpcode() == ISD::STRICT_FSETCCS) + ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); - OpValues.push_back(ScalarOp.getValue(0)); - OpChains.push_back(ScalarOp.getValue(1)); + OpValues.push_back(ScalarResult); + OpChains.push_back(ScalarChain); } SDValue Result = DAG.getBuildVector(VT, dl, OpValues); SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); - AddLegalizedOperand(Op.getValue(0), Result); - AddLegalizedOperand(Op.getValue(1), NewChain); - - return Op.getResNo() ? NewChain : Result; + Results.push_back(Result); + Results.push_back(NewChain); } -SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { + EVT VT = Node->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue CC = Node->getOperand(2); EVT TmpEltVT = LHS.getValueType().getVectorElementType(); - SDLoc dl(Op); + SDLoc dl(Node); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode( diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3763e886cef2..974914d00d05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TypeSize.h" using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -50,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; @@ -146,35 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = ScalarizeVecRes_TernaryOp(N); break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_EXTEND: + +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" R = ScalarizeVecRes_StrictFPOp(N); break; + case ISD::UADDO: case ISD::SADDO: case ISD::USUBO: @@ -187,7 +165,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIXSAT: case ISD::UMULFIX: case ISD::UMULFIXSAT: - R = ScalarizeVecRes_MULFIX(N); + case ISD::SDIVFIX: + case ISD::UDIVFIX: + R = ScalarizeVecRes_FIX(N); break; } @@ -211,7 +191,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { Op0.getValueType(), Op0, Op1, Op2); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = N->getOperand(2); @@ -226,10 +206,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { EVT ValueVTs[] = {VT, MVT::Other}; SDLoc dl(N); - SmallVector<SDValue, 4> Opers; + SmallVector<SDValue, 4> Opers(NumOpers); // The Chain is the first operand. - Opers.push_back(Chain); + Opers[0] = Chain; // Now process the remaining operands. for (unsigned i = 1; i < NumOpers; ++i) { @@ -238,7 +218,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { if (Oper.getValueType().isVector()) Oper = GetScalarizedVector(Oper); - Opers.push_back(Oper); + Opers[i] = Oper; } SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers); @@ -326,18 +306,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { NewVT, Op, N->getOperand(1)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - SDValue Op = GetScalarizedVector(N->getOperand(1)); - SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), - { NewVT, MVT::Other }, - { N->getOperand(0), Op, N->getOperand(2) }); - // Legalize the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - return Res; -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, SDLoc(N), @@ -606,6 +574,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: Res = ScalarizeVecOp_UnaryOp_StrictFP(N); @@ -699,7 +669,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + + // Do our own replacement and return SDValue() to tell the caller that we + // handled all replacements since caller can only handle a single result. + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -804,7 +779,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); + + // Do our own replacement and return SDValue() to tell the caller that we + // handled all replacements since caller can only handle a single result. + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { @@ -901,13 +882,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FP_EXTEND: - case ISD::STRICT_FP_EXTEND: case ISD::FP_ROUND: - case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: - case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::STRICT_FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: @@ -964,32 +941,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMA: SplitVecRes_TernaryOp(N, Lo, Hi); break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: + +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" SplitVecRes_StrictFPOp(N, Lo, Hi); break; + case ISD::UADDO: case ISD::SADDO: case ISD::USUBO: @@ -1002,7 +960,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIXSAT: case ISD::UMULFIX: case ISD::UMULFIXSAT: - SplitVecRes_MULFIX(N, Lo, Hi); + case ISD::SDIVFIX: + case ISD::UDIVFIX: + SplitVecRes_FIX(N, Lo, Hi); break; } @@ -1041,7 +1001,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, Op0Hi, Op1Hi, Op2Hi); } -void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; @@ -1206,9 +1166,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueSizeInBits() / 8; - StackPtr = - DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); + StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -1304,12 +1262,12 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - SmallVector<SDValue, 4> OpsLo; - SmallVector<SDValue, 4> OpsHi; + SmallVector<SDValue, 4> OpsLo(NumOps); + SmallVector<SDValue, 4> OpsHi(NumOps); // The Chain is the first operand. - OpsLo.push_back(Chain); - OpsHi.push_back(Chain); + OpsLo[0] = Chain; + OpsHi[0] = Chain; // Now process the remaining operands. for (unsigned i = 1; i < NumOps; ++i) { @@ -1327,8 +1285,8 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); } - OpsLo.push_back(OpLo); - OpsHi.push_back(OpHi); + OpsLo[i] = OpLo; + OpsHi[i] = OpHi; } EVT LoValueVTs[] = {LoVT, MVT::Other}; @@ -1572,12 +1530,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { + assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(MLD); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); + SDValue Offset = MLD->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked load offset"); SDValue Mask = MLD->getMask(); SDValue PassThru = MLD->getPassThru(); unsigned Alignment = MLD->getOriginalAlignment(); @@ -1609,8 +1570,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); @@ -1621,8 +1583,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the // other one. @@ -1747,24 +1710,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); - } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, - { N->getOperand(0), Lo, N->getOperand(2) }); - Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, - { N->getOperand(0), Hi, N->getOperand(2) }); - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Lo.getValue(1), Hi.getValue(1)); - ReplaceValueWith(SDValue(N, 1), NewChain); - } else if (N->isStrictFPOpcode()) { - Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, - { N->getOperand(0), Lo }); - Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, - { N->getOperand(0), Hi }); - // Legalize the chain result - switch anything that used the old chain to - // use the new one. - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Lo.getValue(1), Hi.getValue(1)); - ReplaceValueWith(SDValue(N, 1), NewChain); } else { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -2003,9 +1948,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) + if (N->getValueType(0).bitsLT( + N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) Res = SplitVecOp_TruncateHelper(N); else Res = SplitVecOp_UnaryOp(N); @@ -2357,8 +2305,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed masked store of vector?"); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); + SDValue Offset = N->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked store offset"); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); @@ -2392,8 +2343,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - N->isTruncatingStore(), + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, @@ -2405,8 +2356,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - N->isTruncatingStore(), N->isCompressingStore()); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); // Build a factor node to remember that this store is independent of the // other one. @@ -2562,7 +2514,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // // Without this transform, the original truncate would end up being // scalarized, which is pretty much always a last resort. - SDValue InVec = N->getOperand(0); + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + SDValue InVec = N->getOperand(OpNo); EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); unsigned NumElements = OutVT.getVectorNumElements(); @@ -2606,8 +2559,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements/2); - SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); - SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); + + SDValue HalfLo; + SDValue HalfHi; + SDValue Chain; + if (N->isStrictFPOpcode()) { + HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, + {N->getOperand(0), HalfLo}); + HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, + {N->getOperand(0), HalfHi}); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1), + HalfHi.getValue(1)); + } else { + HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); + } // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, @@ -2616,6 +2584,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. + + if (N->isStrictFPOpcode()) { + SDValue Res = DAG.getNode( + ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other}, + {Chain, InterVec, + DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))}); + // Relink the chain + ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1)); + return Res; + } + return IsFloat ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, DAG.getTargetConstant( @@ -2774,30 +2753,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryWithExtraScalarOp(N); break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: +#include "llvm/IR/ConstrainedOps.def" Res = WidenVecRes_StrictFP(N); break; @@ -2843,13 +2801,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; - case ISD::STRICT_FP_EXTEND: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_TO_SINT: - case ISD::STRICT_FP_TO_UINT: - Res = WidenVecRes_Convert_StrictFP(N); - break; - case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -3091,6 +3042,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { + switch (N->getOpcode()) { + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + return WidenVecRes_STRICT_FSETCC(N); + case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + return WidenVecRes_Convert_StrictFP(N); + default: + break; + } + // StrictFP op widening for operations that can trap. unsigned NumOpers = N->getNumOperands(); unsigned Opcode = N->getOpcode(); @@ -3497,7 +3463,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: break; - case TargetLowering::TypePromoteInteger: + case TargetLowering::TypePromoteInteger: { // If the incoming type is a vector that is being promoted, then // we know that the elements are arranged differently and that we // must perform the conversion using a stack slot. @@ -3506,11 +3472,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. - InOp = GetPromotedInteger(InOp); - InVT = InOp.getValueType(); - if (WidenVT.bitsEq(InVT)) - return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + SDValue NInOp = GetPromotedInteger(InOp); + EVT NInVT = NInOp.getValueType(); + if (WidenVT.bitsEq(NInVT)) { + // For big endian targets we need to shift the input integer or the + // interesting bits will end up at the wrong place. + if (DAG.getDataLayout().isBigEndian()) { + unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); + EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout()); + assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!"); + NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp, + DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); + } + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp); + } + InOp = NInOp; + InVT = NInVT; break; + } case TargetLowering::TypeSoftenFloat: case TargetLowering::TypePromoteFloat: case TargetLowering::TypeExpandInteger: @@ -3748,10 +3727,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { WidenVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); - SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), - Mask, PassThru, N->getMemoryVT(), - N->getMemOperand(), ExtType, - N->isExpandingLoad()); + SDValue Res = DAG.getMaskedLoad( + WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + ExtType, N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -3798,6 +3777,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { WidenVT, N->getOperand(0)); } +// Return true is this is a SETCC node or a strict version of it. +static inline bool isSETCCOp(unsigned Opcode) { + switch (Opcode) { + case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + return true; + } + return false; +} + // Return true if this is a node that could have two SETCCs as operands. static inline bool isLogicalMaskOp(unsigned Opcode) { switch (Opcode) { @@ -3809,6 +3799,13 @@ static inline bool isLogicalMaskOp(unsigned Opcode) { return false; } +// If N is a SETCC or a strict variant of it, return the type +// of the compare operands. +static inline EVT getSETCCOperandType(SDValue N) { + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + return N->getOperand(OpNo).getValueType(); +} + // This is used just for the assert in convertMask(). Check that this either // a SETCC or a previously handled SETCC by convertMask(). #ifndef NDEBUG @@ -3831,7 +3828,7 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { return isSETCCorConvertedSETCC(N.getOperand(0)) && isSETCCorConvertedSETCC(N.getOperand(1)); - return (N.getOpcode() == ISD::SETCC || + return (isSETCCOp(N.getOpcode()) || ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -3846,10 +3843,17 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. + SDValue Mask; SmallVector<SDValue, 4> Ops; for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); + if (InMask->isStrictFPOpcode()) { + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), + { MaskVT, MVT::Other }, Ops); + ReplaceValueWith(InMask.getValue(1), Mask.getValue(1)); + } + else + Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. @@ -3902,7 +3906,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { if (N->getOpcode() != ISD::VSELECT) return SDValue(); - if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode())) + if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode())) return SDValue(); // If this is a splitted VSELECT that was previously already handled, do @@ -3925,8 +3929,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { return SDValue(); // If there is support for an i1 vector mask, don't touch. - if (Cond.getOpcode() == ISD::SETCC) { - EVT SetCCOpVT = Cond->getOperand(0).getValueType(); + if (isSETCCOp(Cond.getOpcode())) { + EVT SetCCOpVT = getSETCCOperandType(Cond); while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal) SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT); EVT SetCCResVT = getSetCCResultType(SetCCOpVT); @@ -3957,17 +3961,17 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger(); SDValue Mask; - if (Cond->getOpcode() == ISD::SETCC) { - EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType()); + if (isSETCCOp(Cond->getOpcode())) { + EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond)); Mask = convertMask(Cond, MaskVT, ToMaskVT); } else if (isLogicalMaskOp(Cond->getOpcode()) && - Cond->getOperand(0).getOpcode() == ISD::SETCC && - Cond->getOperand(1).getOpcode() == ISD::SETCC) { + isSETCCOp(Cond->getOperand(0).getOpcode()) && + isSETCCOp(Cond->getOperand(1).getOpcode())) { // Cond is (AND/OR/XOR (SETCC, SETCC)) SDValue SETCC0 = Cond->getOperand(0); SDValue SETCC1 = Cond->getOperand(1); - EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType()); - EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType()); + EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0)); + EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1)); unsigned ScalarBits0 = VT0.getScalarSizeInBits(); unsigned ScalarBits1 = VT1.getScalarSizeInBits(); unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); @@ -4119,6 +4123,47 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { WidenVT, InOp1, InOp2, N->getOperand(2)); } +SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(1).getValueType().isVector() && + "Operands must be vectors"); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + SDValue CC = N->getOperand(3); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + + // Fully unroll and reassemble. + SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 8> Chains(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue LHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue RHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, + {Chain, LHSElem, RHSElem, CC}); + Chains[i] = Scalars[i].getValue(1); + Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i], + DAG.getBoolConstant(true, dl, EltVT, VT), + DAG.getBoolConstant(false, dl, EltVT, VT)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return DAG.getBuildVector(WidenVT, dl, Scalars); +} //===----------------------------------------------------------------------===// // Widen Vector Operand @@ -4150,6 +4195,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; @@ -4161,12 +4208,16 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::TRUNCATE: Res = WidenVecOp_Convert(N); break; @@ -4297,13 +4348,21 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { SDValue Res; if (N->isStrictFPOpcode()) { - Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, - { N->getOperand(0), InOp }); + if (Opcode == ISD::STRICT_FP_ROUND) + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp, N->getOperand(2) }); + else + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp }); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else - Res = DAG.getNode(Opcode, dl, WideVT, InOp); + } else { + if (Opcode == ISD::FP_ROUND) + Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1)); + else + Res = DAG.getNode(Opcode, dl, WideVT, InOp); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); @@ -4486,7 +4545,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), - Mask, MST->getMemoryVT(), MST->getMemOperand(), + MST->getOffset(), Mask, MST->getMemoryVT(), + MST->getMemOperand(), MST->getAddressingMode(), false, MST->isCompressingStore()); } @@ -4580,6 +4640,44 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { return DAG.getNode(ExtendCode, dl, VT, CC); } +SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue LHS = GetWidenedVector(N->getOperand(1)); + SDValue RHS = GetWidenedVector(N->getOperand(2)); + SDValue CC = N->getOperand(3); + SDLoc dl(N); + + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + // Unroll into a build vector. + SmallVector<SDValue, 8> Scalars(NumElts); + SmallVector<SDValue, 8> Chains(NumElts); + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue LHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue RHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, + {Chain, LHSElem, RHSElem, CC}); + Chains[i] = Scalars[i].getValue(1); + Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i], + DAG.getBoolConstant(true, dl, EltVT, VT), + DAG.getBoolConstant(false, dl, EltVT, VT)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return DAG.getBuildVector(VT, dl, Scalars); +} + SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { SDLoc dl(N); SDValue Op = GetWidenedVector(N->getOperand(0)); @@ -4670,7 +4768,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned Width, EVT WidenVT, unsigned Align = 0, unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); - unsigned WidenWidth = WidenVT.getSizeInBits(); + const bool Scalable = WidenVT.isScalableVector(); + unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); unsigned AlignInBits = Align*8; @@ -4681,23 +4780,27 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // See if there is larger legal integer than the element type to load/store. unsigned VT; - for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; - VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { - EVT MemVT((MVT::SimpleValueType) VT); - unsigned MemVTWidth = MemVT.getSizeInBits(); - if (MemVT.getSizeInBits() <= WidenEltWidth) - break; - auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); - if ((Action == TargetLowering::TypeLegal || - Action == TargetLowering::TypePromoteInteger) && - (WidenWidth % MemVTWidth) == 0 && - isPowerOf2_32(WidenWidth / MemVTWidth) && - (MemVTWidth <= Width || - (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { - if (MemVTWidth == WidenWidth) - return MemVT; - RetVT = MemVT; - break; + // Don't bother looking for an integer type if the vector is scalable, skip + // to vector types. + if (!Scalable) { + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (MemVTWidth == WidenWidth) + return MemVT; + RetVT = MemVT; + break; + } } } @@ -4706,7 +4809,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; - unsigned MemVTWidth = MemVT.getSizeInBits(); + // Skip vector MVTs which don't match the scalable property of WidenVT. + if (Scalable != MemVT.isScalableVector()) + continue; + unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize(); auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); if ((Action == TargetLowering::TypeLegal || Action == TargetLowering::TypePromoteInteger) && diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d4c1fb36475e..0e4d783e3505 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -910,10 +910,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); - if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (MDNode *MD = DAG->getHeapAllocSite(N)) if (NewInsn && NewInsn->isCall()) - MF.addCodeViewHeapAllocSite(NewInsn, MD); - } + NewInsn->setHeapAllocMarker(MF, MD); GluedNodes.pop_back(); } @@ -923,9 +922,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { if (NewInsn && NewInsn->isCall()) - MF.addCodeViewHeapAllocSite(NewInsn, MD); + NewInsn->setHeapAllocMarker(MF, MD); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 52a71b91d93f..313e07b5fdd6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -63,6 +66,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -352,9 +356,9 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { (OldG << 2)); // New L bit. } -ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { +static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) { unsigned Operation = Op; - if (isInteger) + if (isIntegerLike) Operation ^= 7; // Flip L, G, E bits, but not U. else Operation ^= 15; // Flip all of the condition bits. @@ -365,6 +369,15 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { return ISD::CondCode(Operation); } +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) { + return getSetCCInverseImpl(Op, Type.isInteger()); +} + +ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op, + bool isIntegerLike) { + return getSetCCInverseImpl(Op, isIntegerLike); +} + /// For an integer comparison, return 1 if the comparison is a signed operation /// and 2 if the result is an unsigned comparison. Return zero if the operation /// does not depend on the sign of the input (setne and seteq). @@ -385,7 +398,8 @@ static int isSignedOp(ISD::CondCode Opcode) { } ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, - bool IsInteger) { + EVT Type) { + bool IsInteger = Type.isInteger(); if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) // Cannot fold a signed integer setcc with an unsigned integer setcc. return ISD::SETCC_INVALID; @@ -405,7 +419,8 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, } ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, - bool IsInteger) { + EVT Type) { + bool IsInteger = Type.isInteger(); if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) // Cannot fold a signed setcc with an unsigned setcc. return ISD::SETCC_INVALID; @@ -1005,7 +1020,9 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence) { + LegacyDivergenceAnalysis * Divergence, + ProfileSummaryInfo *PSIin, + BlockFrequencyInfo *BFIin) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1014,6 +1031,8 @@ void SelectionDAG::init(MachineFunction &NewMF, LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); DA = Divergence; + PSI = PSIin; + BFI = BFIin; } SelectionDAG::~SelectionDAG() { @@ -1023,6 +1042,11 @@ SelectionDAG::~SelectionDAG() { delete DbgInfo; } +bool SelectionDAG::shouldOptForSize() const { + return MF->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); +} + void SelectionDAG::allnodes_clear() { assert(&*AllNodes.begin() == &EntryNode); AllNodes.remove(AllNodes.begin()); @@ -1101,6 +1125,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); } +std::pair<SDValue, SDValue> +SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain, + const SDLoc &DL, EVT VT) { + assert(!VT.bitsEq(Op.getValueType()) && + "Strict no-op FP extend/round not allowed."); + SDValue Res = + VT.bitsGT(Op.getValueType()) + ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}) + : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, + {Chain, Op, getIntPtrConstant(0, DL)}); + + return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1)); +} + SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : @@ -1279,7 +1317,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isVector()) + if (VT.isScalableVector()) + Result = getSplatVector(VT, DL, Result); + else if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); return Result; @@ -1425,7 +1465,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction().hasOptSize() + Alignment = shouldOptForSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -2379,9 +2419,10 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. -static const APInt *getValidShiftAmountConstant(SDValue V) { +static const APInt *getValidShiftAmountConstant(SDValue V, + const APInt &DemandedElts) { unsigned BitWidth = V.getScalarValueSizeInBits(); - if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { + if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) { // Shifting more than the bitwidth is not valid. const APInt &ShAmt = SA->getAPIntValue(); if (ShAmt.ult(BitWidth)) @@ -2392,13 +2433,16 @@ static const APInt *getValidShiftAmountConstant(SDValue V) { /// If a SHL/SRA/SRL node has constant vector shift amounts that are all less /// than the element bit-width of the shift node, return the minimum value. -static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { +static const APInt * +getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) return nullptr; const APInt *MinShAmt = nullptr; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); if (!SA) return nullptr; @@ -2413,6 +2457,32 @@ static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { return MinShAmt; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the maximum value. +static const APInt * +getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MaxShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MaxShAmt && MaxShAmt->uge(ShAmt)) + continue; + MaxShAmt = &ShAmt; + } + return MaxShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -2784,37 +2854,60 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) Known.Zero.setBitsFrom(1); break; + } case ISD::SHL: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { unsigned Shift = ShAmt->getZExtValue(); Known.Zero <<= Shift; Known.One <<= Shift; // Low bits are known zero. Known.Zero.setLowBits(Shift); + break; } + + // No matter the shift amount, the trailing zeros will stay zero. + Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros()); + Known.One.clearAllBits(); + + // Minimum shift low bits are known zero. + if (const APInt *ShMinAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Known.Zero.setLowBits(ShMinAmt->getZExtValue()); break; case ISD::SRL: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { unsigned Shift = ShAmt->getZExtValue(); Known.Zero.lshrInPlace(Shift); Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { - // Minimum shift high bits are known zero. - Known.Zero.setHighBits(ShMinAmt->getZExtValue()); + break; } + + // No matter the shift amount, the leading zeros will stay zero. + Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros()); + Known.One.clearAllBits(); + + // Minimum shift high bits are known zero. + if (const APInt *ShMinAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); break; case ISD::SRA: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned Shift = ShAmt->getZExtValue(); // Sign extend known zero/one bit (else is unknown). @@ -3336,20 +3429,20 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, KnownBits N0Known = computeKnownBits(N0); bool overflow; - (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); + (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow); if (!overflow) return OFK_Never; } // mulhi + 1 never overflow if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && - (~N1Known.Zero & 0x01) == ~N1Known.Zero) + (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue()) return OFK_Never; if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { KnownBits N0Known = computeKnownBits(N0); - if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) + if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue()) return OFK_Never; } @@ -3550,25 +3643,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = VTBits - SrcVT.getScalarSizeInBits(); return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; } - case ISD::SRA: - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); - // SRA X, C -> adds C sign bits. - if (ConstantSDNode *C = - isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { - APInt ShiftVal = C->getAPIntValue(); - ShiftVal += Tmp; - Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); - } + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + // SRA X, C -> adds C sign bits. + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) + Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); + else if (const APInt *ShAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); return Tmp; case ISD::SHL: - if (ConstantSDNode *C = - isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { - // shl destroys sign bits. - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); - if (C->getAPIntValue().uge(VTBits) || // Bad shift. - C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out. - return Tmp - C->getZExtValue(); + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { + // shl destroys sign bits, ensure it doesn't shift out all sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (ShAmt->ult(Tmp)) + return Tmp - ShAmt->getZExtValue(); + } else if (const APInt *ShAmt = + getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (ShAmt->ult(Tmp)) + return Tmp - ShAmt->getZExtValue(); } break; case ISD::AND: @@ -3648,11 +3742,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return VTBits; break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; + } case ISD::ROTL: case ISD::ROTR: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { @@ -4648,11 +4746,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); - // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && - OpOpcode == ISD::FSUB) - return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), - Operand.getOperand(0), Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getOperand(0); break; @@ -4689,46 +4782,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; } -static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, - const APInt &C2) { +static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { switch (Opcode) { - case ISD::ADD: return std::make_pair(C1 + C2, true); - case ISD::SUB: return std::make_pair(C1 - C2, true); - case ISD::MUL: return std::make_pair(C1 * C2, true); - case ISD::AND: return std::make_pair(C1 & C2, true); - case ISD::OR: return std::make_pair(C1 | C2, true); - case ISD::XOR: return std::make_pair(C1 ^ C2, true); - case ISD::SHL: return std::make_pair(C1 << C2, true); - case ISD::SRL: return std::make_pair(C1.lshr(C2), true); - case ISD::SRA: return std::make_pair(C1.ashr(C2), true); - case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); - case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); - case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); - case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); - case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); - case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); - case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true); - case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true); - case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true); - case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true); + case ISD::ADD: return C1 + C2; + case ISD::SUB: return C1 - C2; + case ISD::MUL: return C1 * C2; + case ISD::AND: return C1 & C2; + case ISD::OR: return C1 | C2; + case ISD::XOR: return C1 ^ C2; + case ISD::SHL: return C1 << C2; + case ISD::SRL: return C1.lshr(C2); + case ISD::SRA: return C1.ashr(C2); + case ISD::ROTL: return C1.rotl(C2); + case ISD::ROTR: return C1.rotr(C2); + case ISD::SMIN: return C1.sle(C2) ? C1 : C2; + case ISD::SMAX: return C1.sge(C2) ? C1 : C2; + case ISD::UMIN: return C1.ule(C2) ? C1 : C2; + case ISD::UMAX: return C1.uge(C2) ? C1 : C2; + case ISD::SADDSAT: return C1.sadd_sat(C2); + case ISD::UADDSAT: return C1.uadd_sat(C2); + case ISD::SSUBSAT: return C1.ssub_sat(C2); + case ISD::USUBSAT: return C1.usub_sat(C2); case ISD::UDIV: if (!C2.getBoolValue()) break; - return std::make_pair(C1.udiv(C2), true); + return C1.udiv(C2); case ISD::UREM: if (!C2.getBoolValue()) break; - return std::make_pair(C1.urem(C2), true); + return C1.urem(C2); case ISD::SDIV: if (!C2.getBoolValue()) break; - return std::make_pair(C1.sdiv(C2), true); + return C1.sdiv(C2); case ISD::SREM: if (!C2.getBoolValue()) break; - return std::make_pair(C1.srem(C2), true); + return C1.srem(C2); } - return std::make_pair(APInt(1, 0), false); + return llvm::None; } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, @@ -4736,12 +4829,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, const ConstantSDNode *C2) { if (C1->isOpaque() || C2->isOpaque()) return SDValue(); - - std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(), - C2->getAPIntValue()); - if (!Folded.second) - return SDValue(); - return getConstant(Folded.first, DL, VT); + if (Optional<APInt> Folded = + FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue())) + return getConstant(Folded.getValue(), DL, VT); + return SDValue(); } SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, @@ -5228,8 +5319,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ element type of the vector."); - // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. - if (N1.isUndef()) + // Extract from an undefined value or using an undefined index is undefined. + if (N1.isUndef() || N2.isUndef()) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF @@ -5506,6 +5597,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) return getUNDEF(VT); + + // Undefined index can be assumed out-of-bounds, so that's UNDEF too. + if (N3.isUndef()) + return getUNDEF(VT); + + // If the inserted element is an UNDEF, just use the input vector. + if (N2.isUndef()) + return N1; + break; } case ISD::INSERT_SUBVECTOR: { @@ -5697,10 +5797,19 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, return SDValue(nullptr, 0); } -SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, - const SDLoc &DL) { +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset, + const SDLoc &DL, + const SDNodeFlags Flags) { EVT VT = Base.getValueType(); - return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); + return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags); +} + +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, + const SDLoc &DL, + const SDNodeFlags Flags) { + assert(Offset.getValueType().isInteger()); + EVT BasePtrVT = Ptr.getValueType(); + return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags); } /// Returns true if memcpy source is constant data. @@ -5722,12 +5831,13 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { SrcDelta + G->getOffset()); } -static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { +static bool shouldLowerMemFuncForSize(const MachineFunction &MF, + SelectionDAG &DAG) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) return MF.getFunction().hasMinSize(); - return MF.getFunction().hasOptSize(); + return DAG.shouldOptForSize(); } static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, @@ -5777,7 +5887,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -5960,7 +6070,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -6066,7 +6176,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -6557,7 +6667,9 @@ SDValue SelectionDAG::getMemIntrinsicNode( if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); - if (!Size) + if (!Size && MemVT.isScalableVector()) + Size = MemoryLocation::UnknownSize; + else if (!Size) Size = MemVT.getStoreSize(); MachineFunction &MF = getMachineFunction(); @@ -6951,16 +7063,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue PassThru, - EVT MemVT, MachineMemOperand *MMO, + SDValue Base, SDValue Offset, SDValue Mask, + SDValue PassThru, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked load with an offset!"); + SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( - dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); + dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -6968,7 +7086,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return SDValue(E, 0); } auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - ExtTy, isExpanding, MemVT, MMO); + AM, ExtTy, isExpanding, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -6978,27 +7096,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return V; } +SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad); + assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); + return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base, + Offset, LD->getMask(), LD->getPassThru(), + LD->getMemoryVT(), LD->getMemOperand(), AM, + LD->getExtensionType(), LD->isExpandingLoad()); +} + SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, - SDValue Val, SDValue Ptr, SDValue Mask, - EVT MemVT, MachineMemOperand *MMO, - bool IsTruncating, bool IsCompressing) { + SDValue Val, SDValue Base, SDValue Offset, + SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, bool IsTruncating, + bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - SDVTList VTs = getVTList(MVT::Other); - SDValue Ops[] = { Chain, Val, Ptr, Mask }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( - dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); + dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - IsTruncating, IsCompressing, MemVT, MMO); + auto *N = + newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7008,6 +7144,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, return V; } +SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore); + assert(ST->getOffset().isUndef() && + "Masked store is already a indexed store!"); + return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset, + ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(), + AM, ST->isTruncatingStore(), ST->isCompressingStore()); +} + SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, @@ -7263,8 +7410,40 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); -#if 0 switch (Opcode) { + case ISD::STRICT_FP_EXTEND: + assert(VTList.NumVTs == 2 && Ops.size() == 2 && + "Invalid STRICT_FP_EXTEND!"); + assert(VTList.VTs[0].isFloatingPoint() && + Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!"); + assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && + "STRICT_FP_EXTEND result type should be vector iff the operand " + "type is vector!"); + assert((!VTList.VTs[0].isVector() || + VTList.VTs[0].getVectorNumElements() == + Ops[1].getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) && + "Invalid fpext node, dst <= src!"); + break; + case ISD::STRICT_FP_ROUND: + assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!"); + assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && + "STRICT_FP_ROUND result type should be vector iff the operand " + "type is vector!"); + assert((!VTList.VTs[0].isVector() || + VTList.VTs[0].getVectorNumElements() == + Ops[1].getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(VTList.VTs[0].isFloatingPoint() && + Ops[1].getValueType().isFloatingPoint() && + VTList.VTs[0].bitsLT(Ops[1].getValueType()) && + isa<ConstantSDNode>(Ops[2]) && + (cast<ConstantSDNode>(Ops[2])->getZExtValue() == 0 || + cast<ConstantSDNode>(Ops[2])->getZExtValue() == 1) && + "Invalid STRICT_FP_ROUND!"); + break; +#if 0 // FIXME: figure out how to safely handle things like // int foo(int x) { return 1 << (x & 255); } // int bar() { return foo(256); } @@ -7283,8 +7462,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } break; - } #endif + } // Memoize the node unless it returns a flag. SDNode *N; @@ -7740,38 +7919,11 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); - case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; - case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; - case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; - case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; - case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; - case ISD::STRICT_FMA: NewOpc = ISD::FMA; break; - case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break; - case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; - case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; - case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break; - case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break; - case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break; - case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break; - case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; - case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; - case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; - case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; - case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; - case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; - case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; - case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; - case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; - case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; - case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; - case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; - case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; - case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; - case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; - case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; - case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; - case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; - case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; +#include "llvm/IR/ConstrainedOps.def" } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); @@ -8051,9 +8203,9 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, Expr = *Fragment; } // Clone the SDDbgValue and move it to To. - SDDbgValue *Clone = - getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), - Dbg->getDebugLoc(), Dbg->getOrder()); + SDDbgValue *Clone = getDbgValue( + Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getDebugLoc(), + std::max(ToNode->getIROrder(), Dbg->getOrder())); ClonedDVs.push_back(Clone); if (InvalidateDbg) { @@ -8831,7 +8983,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, // We check here that the size of the memory operand fits within the size of // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. - assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); + // TODO: Make MachineMemOperands aware of scalable vectors. + assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() && + "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -9245,11 +9399,11 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, /// it cannot be inferred. unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. - const GlobalValue *GV; + const GlobalValue *GV = nullptr; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); - KnownBits Known(IdxWidth); + unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); + KnownBits Known(PtrWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8c15563fcd23..728d963a916f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -27,11 +27,13 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -84,6 +86,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -722,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned IntermediateNumElts = IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1; - // Convert the vector to the appropiate type if necessary. + // Convert the vector to the appropriate type if necessary. unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; EVT BuiltVectorTy = EVT::getVectorVT( @@ -1021,6 +1025,8 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); + PendingConstrainedFP.clear(); + PendingConstrainedFPStrict.clear(); CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; @@ -1031,50 +1037,66 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } -SDValue SelectionDAGBuilder::getRoot() { - if (PendingLoads.empty()) - return DAG.getRoot(); - - if (PendingLoads.size() == 1) { - SDValue Root = PendingLoads[0]; - DAG.setRoot(Root); - PendingLoads.clear(); - return Root; - } - - // Otherwise, we have to make a token factor node. - SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads); - PendingLoads.clear(); - DAG.setRoot(Root); - return Root; -} - -SDValue SelectionDAGBuilder::getControlRoot() { +// Update DAG root to include dependencies on Pending chains. +SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) { SDValue Root = DAG.getRoot(); - if (PendingExports.empty()) + if (Pending.empty()) return Root; - // Turn all of the CopyToReg chains into one factored node. + // Add current root to PendingChains, unless we already indirectly + // depend on it. if (Root.getOpcode() != ISD::EntryToken) { - unsigned i = 0, e = PendingExports.size(); + unsigned i = 0, e = Pending.size(); for (; i != e; ++i) { - assert(PendingExports[i].getNode()->getNumOperands() > 1); - if (PendingExports[i].getNode()->getOperand(0) == Root) + assert(Pending[i].getNode()->getNumOperands() > 1); + if (Pending[i].getNode()->getOperand(0) == Root) break; // Don't add the root if we already indirectly depend on it. } if (i == e) - PendingExports.push_back(Root); + Pending.push_back(Root); } - Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - PendingExports); - PendingExports.clear(); + if (Pending.size() == 1) + Root = Pending[0]; + else + Root = DAG.getTokenFactor(getCurSDLoc(), Pending); + DAG.setRoot(Root); + Pending.clear(); return Root; } +SDValue SelectionDAGBuilder::getMemoryRoot() { + return updateRoot(PendingLoads); +} + +SDValue SelectionDAGBuilder::getRoot() { + // Chain up all pending constrained intrinsics together with all + // pending loads, by simply appending them to PendingLoads and + // then calling getMemoryRoot(). + PendingLoads.reserve(PendingLoads.size() + + PendingConstrainedFP.size() + + PendingConstrainedFPStrict.size()); + PendingLoads.append(PendingConstrainedFP.begin(), + PendingConstrainedFP.end()); + PendingLoads.append(PendingConstrainedFPStrict.begin(), + PendingConstrainedFPStrict.end()); + PendingConstrainedFP.clear(); + PendingConstrainedFPStrict.clear(); + return getMemoryRoot(); +} + +SDValue SelectionDAGBuilder::getControlRoot() { + // We need to emit pending fpexcept.strict constrained intrinsics, + // so append them to the PendingExports list. + PendingExports.append(PendingConstrainedFPStrict.begin(), + PendingConstrainedFPStrict.end()); + PendingConstrainedFPStrict.clear(); + return updateRoot(PendingExports); +} + void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (I.isTerminator()) { @@ -1104,6 +1126,15 @@ void SelectionDAGBuilder::visit(const Instruction &I) { Node->intersectFlagsWith(IncomingFlags); } } + // Constrained FP intrinsics with fpexcept.ignore should also get + // the NoFPExcept flag. + if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I)) + if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore) + if (SDNode *Node = getNodeForIRValue(&I)) { + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoFPExcept(true); + Node->setFlags(Flags); + } if (!I.isTerminator() && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally @@ -2746,8 +2777,9 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. - assert(!I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, + LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget}) && "Cannot lower invokes with arbitrary operand bundles yet!"); const Value *Callee(I.getCalledValue()); @@ -3033,7 +3065,7 @@ static bool isVectorReductionOp(const User *I) { if (!Visited.insert(User).second) continue; - for (const auto &U : User->users()) { + for (const auto *U : User->users()) { auto Inst = dyn_cast<Instruction>(U); if (!Inst) return false; @@ -3119,6 +3151,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { if (isVectorReductionOp(&I)) { Flags.setVectorReduction(true); LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + + // If no flags are set we will propagate the incoming flags, if any flags + // are set, we will intersect them with the incoming flag and so we need to + // copy the FMF flags here. + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) { + Flags.copyFMF(*FPOp); + } } SDValue Op1 = getValue(I.getOperand(0)); @@ -4039,9 +4078,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (isVolatile || NumValues > MaxParallelChains) + if (isVolatile) // Serialize volatile loads with other side effects. Root = getRoot(); + else if (NumValues > MaxParallelChains) + Root = getMemoryRoot(); else if (AA && AA->pointsToConstantMemory(MemoryLocation( SV, @@ -4216,10 +4257,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Src = getValue(SrcV); SDValue Ptr = getValue(PtrV); - SDValue Root = getRoot(); + SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); - EVT PtrVT = Ptr.getValueType(); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4245,8 +4285,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), Flags); + SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); @@ -4292,6 +4331,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4303,11 +4343,14 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOStore, VT.getStoreSize(), + MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo); - SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, - MMO, false /* Truncating */, - IsCompressing); + SDValue StoreNode = + DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, + ISD::UNINDEXED, false /* Truncating */, IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -4346,9 +4389,10 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, unsigned FinalIndex = GEP->getNumOperands() - 1; Value *IndexVal = GEP->getOperand(FinalIndex); + gep_type_iterator GTI = gep_type_begin(*GEP); // Ensure all the other indices are 0. - for (unsigned i = 1; i < FinalIndex; ++i) { + for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) { auto *C = dyn_cast<Constant>(GEP->getOperand(i)); if (!C) return false; @@ -4361,18 +4405,39 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. - if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) + if (!SDB->findValue(Ptr)) + return false; + Constant *C = dyn_cast<Constant>(IndexVal); + if (!C && !SDB->findValue(IndexVal)) return false; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); - Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + StructType *STy = GTI.getStructTypeOrNull(); + + if (STy) { + const StructLayout *SL = DL.getStructLayout(STy); + if (isa<VectorType>(C->getType())) { + C = C->getSplatValue(); + // FIXME: If getSplatValue may return nullptr for a structure? + // If not, the following check can be removed. + if (!C) + return false; + } + auto *CI = cast<ConstantInt>(C); + Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + } else { + Scale = DAG.getTargetConstant( + DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + Index = SDB->getValue(IndexVal); + } Base = SDB->getValue(Ptr); - Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; - if (!Index.getValueType().isVector()) { + if (STy || !Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); @@ -4383,7 +4448,7 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask) const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); @@ -4407,7 +4472,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), - MachineMemOperand::MOStore, VT.getStoreSize(), + MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4415,7 +4483,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; + SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO, IndexType); DAG.setRoot(Scatter); @@ -4452,6 +4520,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4462,22 +4531,29 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - bool AddToChain = - !AA || !AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, - LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo)); + MemoryLocation ML; + if (VT.isScalableVector()) + ML = MemoryLocation(PtrOperand); + else + ML = MemoryLocation(PtrOperand, LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo); + bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOLoad, VT.getStoreSize(), + MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo, Ranges); - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD, IsExpanding); + SDValue Load = + DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, + ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); @@ -4524,7 +4600,10 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), - MachineMemOperand::MOLoad, VT.getStoreSize(), + MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo, Ranges); if (!UniformBase) { @@ -4634,10 +4713,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, - TLI.getFenceOperandTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, - TLI.getFenceOperandTy(DAG.getDataLayout())); + Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); + Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -5344,8 +5423,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function &F = DAG.getMachineFunction().getFunction(); - if (!F.hasOptSize() || + bool OptForSize = DAG.shouldOptForSize(); + if (!OptForSize || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -5382,6 +5461,60 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, + SDValue LHS, SDValue RHS, SDValue Scale, + SelectionDAG &DAG, const TargetLowering &TLI) { + EVT VT = LHS.getValueType(); + bool Signed = Opcode == ISD::SDIVFIX; + LLVMContext &Ctx = *DAG.getContext(); + + // If the type is legal but the operation isn't, this node might survive all + // the way to operation legalization. If we end up there and we do not have + // the ability to widen the type (if VT*2 is not legal), we cannot expand the + // node. + + // Coax the legalizer into expanding the node during type legalization instead + // by bumping the size by one bit. This will force it to Promote, enabling the + // early expansion and avoiding the need to expand later. + + // We don't have to do this if Scale is 0; that can always be expanded. + + // FIXME: We wouldn't have to do this (or any of the early + // expansion/promotion) if it was possible to expand a libcall of an + // illegal type during operation legalization. But it's not, so things + // get a bit hacky. + unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue(); + if (ScaleInt > 0 && + (TLI.isTypeLegal(VT) || + (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) { + TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction( + Opcode, VT, ScaleInt); + if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) { + EVT PromVT; + if (VT.isScalarInteger()) + PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1); + else if (VT.isVector()) { + PromVT = VT.getVectorElementType(); + PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1); + PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount()); + } else + llvm_unreachable("Wrong VT for DIVFIX?"); + if (Signed) { + LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT); + RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT); + } else { + LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT); + RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT); + } + // TODO: Saturation. + SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale); + return DAG.getZExtOrTrunc(Res, DL, VT); + } + } + + return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale); +} + // getUnderlyingArgRegs - Find underlying registers used for a truncated, // bitcasted, or split argument. Returns a list of <Register, size in bits> static void @@ -5474,7 +5607,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // is an argument. But since we already has used %a1 to describe a parameter // we should not handle that last dbg.value here (that would result in an // incorrect hoisting of the DBG_VALUE to the function entry). - // Notice that we allow one dbg.value per IR level argument, to accomodate + // Notice that we allow one dbg.value per IR level argument, to accommodate // for the situation with fragments above. if (VariableIsFunctionInputArg) { unsigned ArgNo = Arg->getArgNo(); @@ -5489,7 +5622,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); @@ -5511,7 +5643,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (Reg) { Op = MachineOperand::CreateReg(Reg, false); - IsIndirect = IsDbgDeclare; } } @@ -5530,15 +5661,38 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { unsigned Offset = 0; for (auto RegAndSize : SplitRegs) { + // If the expression is already a fragment, the current register + // offset+size might extend beyond the fragment. In this case, only + // the register bits that are inside the fragment are relevant. + int RegFragmentSizeInBits = RegAndSize.second; + if (auto ExprFragmentInfo = Expr->getFragmentInfo()) { + uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits; + // The register is entirely outside the expression fragment, + // so is irrelevant for debug info. + if (Offset >= ExprFragmentSizeInBits) + break; + // The register is partially outside the expression fragment, only + // the low bits within the fragment are relevant for debug info. + if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) { + RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset; + } + } + auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegAndSize.second); - if (!FragmentExpr) + Expr, Offset, RegFragmentSizeInBits); + Offset += RegAndSize.second; + // If a valid fragment expression cannot be created, the variable's + // correct value cannot be determined and so it is set as Undef. + if (!FragmentExpr) { + SDDbgValue *SDV = DAG.getConstantDbgValue( + Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); continue; + } assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, RegAndSize.first, Variable, *FragmentExpr)); - Offset += RegAndSize.second; } }; @@ -5555,7 +5709,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } Op = MachineOperand::CreateReg(VMI->second, false); - IsIndirect = IsDbgDeclare; } else if (ArgRegsAndSizes.size() > 1) { // This was split due to the calling convention, and no virtual register // mapping exists for the value. @@ -5569,9 +5722,26 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - IsIndirect = (Op->isReg()) ? IsIndirect : true; - if (IsIndirect) + + // If the argument arrives in a stack slot, then what the IR thought was a + // normal Value is actually in memory, and we must add a deref to load it. + if (Op->isFI()) { + int FI = Op->getIndex(); + unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI); + if (Expr->isImplicit()) { + SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; + Expr = DIExpression::prependOpcodes(Expr, Ops); + } else { + Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); + } + } + + // If this location was specified with a dbg.declare, then it and its + // expression calculate the address of the variable. Append a deref to + // force it to be a memory location. + if (IsDbgDeclare) Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, *Op, Variable, Expr)); @@ -5603,20 +5773,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, /*IsIndirect*/ false, dl, DbgSDNodeOrder); } -// VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) && \ - !defined(setjmp_undefined_for_msvc) -# pragma push_macro("setjmp") -# undef setjmp -# define setjmp_undefined_for_msvc -#endif - static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { switch (Intrinsic) { case Intrinsic::smul_fix: return ISD::SMULFIX; case Intrinsic::umul_fix: return ISD::UMULFIX; + case Intrinsic::smul_fix_sat: + return ISD::SMULFIXSAT; + case Intrinsic::umul_fix_sat: + return ISD::UMULFIXSAT; + case Intrinsic::sdiv_fix: + return ISD::SDIVFIX; + case Intrinsic::udiv_fix: + return ISD::UDIVFIX; default: llvm_unreachable("Unhandled fixed point intrinsic"); } @@ -5687,12 +5857,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, RegName, getValue(RegValue))); return; } - case Intrinsic::setjmp: - lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]); - return; - case Intrinsic::longjmp: - lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]); - return; case Intrinsic::memcpy: { const auto &MCI = cast<MemCpyInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); @@ -5706,7 +5870,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); @@ -5722,7 +5887,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); - SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); return; @@ -5740,7 +5906,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memmove DAG // node. - SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); @@ -6102,44 +6269,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return; - case Intrinsic::experimental_constrained_fadd: - case Intrinsic::experimental_constrained_fsub: - case Intrinsic::experimental_constrained_fmul: - case Intrinsic::experimental_constrained_fdiv: - case Intrinsic::experimental_constrained_frem: - case Intrinsic::experimental_constrained_fma: - case Intrinsic::experimental_constrained_fptosi: - case Intrinsic::experimental_constrained_fptoui: - case Intrinsic::experimental_constrained_fptrunc: - case Intrinsic::experimental_constrained_fpext: - case Intrinsic::experimental_constrained_sqrt: - case Intrinsic::experimental_constrained_pow: - case Intrinsic::experimental_constrained_powi: - case Intrinsic::experimental_constrained_sin: - case Intrinsic::experimental_constrained_cos: - case Intrinsic::experimental_constrained_exp: - case Intrinsic::experimental_constrained_exp2: - case Intrinsic::experimental_constrained_log: - case Intrinsic::experimental_constrained_log10: - case Intrinsic::experimental_constrained_log2: - case Intrinsic::experimental_constrained_lrint: - case Intrinsic::experimental_constrained_llrint: - case Intrinsic::experimental_constrained_rint: - case Intrinsic::experimental_constrained_nearbyint: - case Intrinsic::experimental_constrained_maxnum: - case Intrinsic::experimental_constrained_minnum: - case Intrinsic::experimental_constrained_ceil: - case Intrinsic::experimental_constrained_floor: - case Intrinsic::experimental_constrained_lround: - case Intrinsic::experimental_constrained_llround: - case Intrinsic::experimental_constrained_round: - case Intrinsic::experimental_constrained_trunc: +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case Intrinsic::INTRINSIC: +#include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); return; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(VT)) { + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -6307,7 +6445,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::smul_fix: - case Intrinsic::umul_fix: { + case Intrinsic::umul_fix: + case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -6315,20 +6455,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op1.getValueType(), Op1, Op2, Op3)); return; } - case Intrinsic::smul_fix_sat: { + case Intrinsic::sdiv_fix: + case Intrinsic::udiv_fix: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, - Op3)); - return; - } - case Intrinsic::umul_fix_sat: { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - SDValue Op3 = getValue(I.getArgOperand(2)); - setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, - Op3)); + setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl, + Op1, Op2, Op3, DAG, TLI)); return; } case Intrinsic::stacksave: { @@ -6681,7 +6814,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Add the offset to the FP. Value *FP = I.getArgOperand(1); SDValue FPVal = getValue(FP); - SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); + SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl); setValue(&I, Add); return; @@ -6876,142 +7009,82 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + // We do not need to serialize constrained FP intrinsics against + // each other or against (nonvolatile) loads, so they can be + // chained like loads. + SDValue Chain = DAG.getRoot(); + SmallVector<SDValue, 4> Opers; + Opers.push_back(Chain); + if (FPI.isUnaryOp()) { + Opers.push_back(getValue(FPI.getArgOperand(0))); + } else if (FPI.isTernaryOp()) { + Opers.push_back(getValue(FPI.getArgOperand(0))); + Opers.push_back(getValue(FPI.getArgOperand(1))); + Opers.push_back(getValue(FPI.getArgOperand(2))); + } else { + Opers.push_back(getValue(FPI.getArgOperand(0))); + Opers.push_back(getValue(FPI.getArgOperand(1))); + } + unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::experimental_constrained_fadd: - Opcode = ISD::STRICT_FADD; - break; - case Intrinsic::experimental_constrained_fsub: - Opcode = ISD::STRICT_FSUB; - break; - case Intrinsic::experimental_constrained_fmul: - Opcode = ISD::STRICT_FMUL; - break; - case Intrinsic::experimental_constrained_fdiv: - Opcode = ISD::STRICT_FDIV; - break; - case Intrinsic::experimental_constrained_frem: - Opcode = ISD::STRICT_FREM; - break; - case Intrinsic::experimental_constrained_fma: - Opcode = ISD::STRICT_FMA; - break; - case Intrinsic::experimental_constrained_fptosi: - Opcode = ISD::STRICT_FP_TO_SINT; - break; - case Intrinsic::experimental_constrained_fptoui: - Opcode = ISD::STRICT_FP_TO_UINT; - break; - case Intrinsic::experimental_constrained_fptrunc: - Opcode = ISD::STRICT_FP_ROUND; - break; - case Intrinsic::experimental_constrained_fpext: - Opcode = ISD::STRICT_FP_EXTEND; - break; - case Intrinsic::experimental_constrained_sqrt: - Opcode = ISD::STRICT_FSQRT; - break; - case Intrinsic::experimental_constrained_pow: - Opcode = ISD::STRICT_FPOW; - break; - case Intrinsic::experimental_constrained_powi: - Opcode = ISD::STRICT_FPOWI; - break; - case Intrinsic::experimental_constrained_sin: - Opcode = ISD::STRICT_FSIN; +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case Intrinsic::INTRINSIC: \ + Opcode = ISD::STRICT_##DAGN; \ break; - case Intrinsic::experimental_constrained_cos: - Opcode = ISD::STRICT_FCOS; - break; - case Intrinsic::experimental_constrained_exp: - Opcode = ISD::STRICT_FEXP; - break; - case Intrinsic::experimental_constrained_exp2: - Opcode = ISD::STRICT_FEXP2; - break; - case Intrinsic::experimental_constrained_log: - Opcode = ISD::STRICT_FLOG; - break; - case Intrinsic::experimental_constrained_log10: - Opcode = ISD::STRICT_FLOG10; - break; - case Intrinsic::experimental_constrained_log2: - Opcode = ISD::STRICT_FLOG2; - break; - case Intrinsic::experimental_constrained_lrint: - Opcode = ISD::STRICT_LRINT; - break; - case Intrinsic::experimental_constrained_llrint: - Opcode = ISD::STRICT_LLRINT; - break; - case Intrinsic::experimental_constrained_rint: - Opcode = ISD::STRICT_FRINT; - break; - case Intrinsic::experimental_constrained_nearbyint: - Opcode = ISD::STRICT_FNEARBYINT; - break; - case Intrinsic::experimental_constrained_maxnum: - Opcode = ISD::STRICT_FMAXNUM; - break; - case Intrinsic::experimental_constrained_minnum: - Opcode = ISD::STRICT_FMINNUM; - break; - case Intrinsic::experimental_constrained_ceil: - Opcode = ISD::STRICT_FCEIL; - break; - case Intrinsic::experimental_constrained_floor: - Opcode = ISD::STRICT_FFLOOR; - break; - case Intrinsic::experimental_constrained_lround: - Opcode = ISD::STRICT_LROUND; - break; - case Intrinsic::experimental_constrained_llround: - Opcode = ISD::STRICT_LLROUND; - break; - case Intrinsic::experimental_constrained_round: - Opcode = ISD::STRICT_FROUND; +#include "llvm/IR/ConstrainedOps.def" + } + + // A few strict DAG nodes carry additional operands that are not + // set up by the default code above. + switch (Opcode) { + default: break; + case ISD::STRICT_FP_ROUND: + Opers.push_back( + DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); break; - case Intrinsic::experimental_constrained_trunc: - Opcode = ISD::STRICT_FTRUNC; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI); + Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate()))); break; } - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue Chain = getRoot(); - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); - ValueVTs.push_back(MVT::Other); // Out chain + } SDVTList VTs = DAG.getVTList(ValueVTs); - SDValue Result; - if (Opcode == ISD::STRICT_FP_ROUND) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - DAG.getTargetConstant(0, sdl, - TLI.getPointerTy(DAG.getDataLayout())) }); - else if (FPI.isUnaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)) }); - else if (FPI.isTernaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)), - getValue(FPI.getArgOperand(2)) }); - else - Result = DAG.getNode(Opcode, sdl, VTs, - { Chain, getValue(FPI.getArgOperand(0)), - getValue(FPI.getArgOperand(1)) }); - - if (FPI.getExceptionBehavior() != - ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) { - SDNodeFlags Flags; - Flags.setFPExcept(true); - Result->setFlags(Flags); - } + SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers); assert(Result.getNode()->getNumValues() == 2); + + // Push node to the appropriate list so that future instructions can be + // chained up correctly. SDValue OutChain = Result.getValue(1); - DAG.setRoot(OutChain); + switch (FPI.getExceptionBehavior().getValue()) { + case fp::ExceptionBehavior::ebIgnore: + // The only reason why ebIgnore nodes still need to be chained is that + // they might depend on the current rounding mode, and therefore must + // not be moved across instruction that may change that mode. + LLVM_FALLTHROUGH; + case fp::ExceptionBehavior::ebMayTrap: + // These must not be moved across calls or instructions that may change + // floating-point exception masks. + PendingConstrainedFP.push_back(OutChain); + break; + case fp::ExceptionBehavior::ebStrict: + // These must not be moved across calls or instructions that may change + // floating-point exception masks or read floating-point exception flags. + // In addition, they cannot be optimized out even if unused. + PendingConstrainedFPStrict.push_back(OutChain); + break; + } + SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); } @@ -7102,13 +7175,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // We can't tail call inside a function with a swifterror argument. Lowering - // does not support this yet. It would have to move into the swifterror - // register before the call. - auto *Caller = CS.getInstruction()->getParent()->getParent(); - if (TLI.supportSwiftError() && - Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - isTailCall = false; + if (isTailCall) { + // Avoid emitting tail calls in functions with the disable-tail-calls + // attribute. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == + "true") + isTailCall = false; + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + } for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { @@ -7142,6 +7223,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, isTailCall = false; } + // If call site has a cfguardtarget operand bundle, create and add an + // additional ArgListEntry. + if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) { + TargetLowering::ArgListEntry Entry; + Value *V = Bundle->Inputs[0]; + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; + Entry.Ty = V->getType(); + Entry.IsCFGuardTarget = true; + Args.push_back(Entry); + } + // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) @@ -7374,7 +7467,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // In the mempcpy context we need to pass in a false value for isTailCall // because the return pointer needs to be adjusted by the size of // the copied memory. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); @@ -7683,8 +7777,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. - assert(!I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + // CFGuardTarget bundles are lowered in LowerCallTo. + assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, + LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledValue()); @@ -8182,10 +8278,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.Type) { case InlineAsm::isOutput: - if (OpInfo.ConstraintType == TargetLowering::C_Memory || - ((OpInfo.ConstraintType == TargetLowering::C_Immediate || - OpInfo.ConstraintType == TargetLowering::C_Other) && - OpInfo.isIndirect)) { + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && @@ -8197,12 +8290,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); - break; - } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || - OpInfo.ConstraintType == TargetLowering::C_Other) && - !OpInfo.isIndirect) || - OpInfo.ConstraintType == TargetLowering::C_Register || - OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { + } else { // Otherwise, this outputs to a register (directly for C_Register / // C_RegisterClass, and a target-defined fashion for // C_Immediate/C_Other). Find a register that we can use. @@ -8285,8 +8373,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || - OpInfo.ConstraintType == TargetLowering::C_Other) && + if (OpInfo.ConstraintType == TargetLowering::C_Other && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; @@ -8339,8 +8426,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register || - OpInfo.ConstraintType == TargetLowering::C_Immediate) && + OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); // TODO: Support this. @@ -8678,7 +8764,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Callee = getValue(CI.getCalledValue()); NullPtr = DAG.getIntPtrConstant(0, DL, true); - // The stackmap intrinsic only records the live variables (the arguemnts + // The stackmap intrinsic only records the live variables (the arguments // passed to it) and emits NOPS (if requested). Unlike the patchpoint // intrinsic, this won't be lowered to a function call. This means we don't // have to worry about calling conventions and target specific lowering code. @@ -9027,6 +9113,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsReturned = false; Entry.IsSwiftSelf = false; Entry.IsSwiftError = false; + Entry.IsCFGuardTarget = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; @@ -9139,6 +9226,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setSwiftSelf(); if (Args[i].IsSwiftError) Flags.setSwiftError(); + if (Args[i].IsCFGuardTarget) + Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); if (Args[i].IsInAlloca) { @@ -9214,9 +9303,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 + // For scalable vectors the scalable part is currently handled + // by individual targets, so we just use the known minimum size here. ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, - i < CLI.NumFixedArgs, - i, j*Parts[j].getValueType().getStoreSize()); + i < CLI.NumFixedArgs, i, + j*Parts[j].getValueType().getStoreSize().getKnownMinSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { @@ -9487,7 +9578,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, /// Try to elide argument copies from memory into a local alloca. Succeeds if /// ArgVal is a load from a suitable fixed stack object. static void tryToElideArgumentCopy( - FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains, + FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains, DenseMap<int, int> &ArgCopyElisionFrameIndexMap, SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, @@ -9507,9 +9598,9 @@ static void tryToElideArgumentCopy( assert(ArgCopyIter != ArgCopyElisionCandidates.end()); const AllocaInst *AI = ArgCopyIter->second.first; int FixedIndex = FINode->getIndex(); - int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int &AllocaIndex = FuncInfo.StaticAllocaMap[AI]; int OldIndex = AllocaIndex; - MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { LLVM_DEBUG( dbgs() << " argument copy elision failed due to bad fixed stack " @@ -9518,7 +9609,7 @@ static void tryToElideArgumentCopy( } unsigned RequiredAlignment = AI->getAlignment(); if (!RequiredAlignment) { - RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment( AI->getAllocatedType()); } if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { @@ -9584,7 +9675,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // flag to ask the target to give us the memory location of that argument if // available. ArgCopyElisionMapTy ArgCopyElisionCandidates; - findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + findArgumentCopyElisionCandidates(DL, FuncInfo.get(), + ArgCopyElisionCandidates); // Set up the incoming argument description vector. for (const Argument &Arg : F.args()) { @@ -9685,8 +9777,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumRegs = TLI->getNumRegistersForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); for (unsigned i = 0; i != NumRegs; ++i) { + // For scalable vectors, use the minimum size; individual targets + // are responsible for handling scalable vector arguments and + // return values. ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - ArgNo, PartBase+i*RegisterVT.getStoreSize()); + ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -9699,7 +9794,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (NeedsRegBlock && Value == NumValues - 1) Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); - PartBase += VT.getStoreSize(); + PartBase += VT.getStoreSize().getKnownMinSize(); } } @@ -9769,7 +9864,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Elide the copying store if the target loaded this argument from a // suitable fixed stack object. if (Ins[i].Flags.isCopyElisionCandidate()) { - tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap, ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, InVals[i], ArgHasUses); } @@ -9795,7 +9890,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumParts = TLI->getNumRegistersForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); - // Even an apparant 'unused' swifterror argument needs to be returned. So + // Even an apparent 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. if (ArgHasUses || isSwiftErrorArg) { @@ -10508,7 +10603,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - SL->findJumpTables(Clusters, &SI, DefaultMBB); + SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI()); SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ @@ -10557,3 +10652,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); } } + +void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { + SDValue N = getValue(I.getOperand(0)); + setValue(&I, N); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index bfcf30b430b6..18e0edf7fc04 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -143,6 +143,20 @@ private: /// tokenfactor for them just before terminator instructions. SmallVector<SDValue, 8> PendingExports; + /// Similar to loads, nodes corresponding to constrained FP intrinsics are + /// bunched up and emitted when necessary. These can be moved across each + /// other and any (normal) memory operation (load or store), but not across + /// calls or instructions having unspecified side effects. As a special + /// case, constrained FP intrinsics using fpexcept.strict may not be deleted + /// even if otherwise unused, so they need to be chained before any + /// terminator instruction (like PendingExports). We track the latter + /// set of nodes in a separate list. + SmallVector<SDValue, 8> PendingConstrainedFP; + SmallVector<SDValue, 8> PendingConstrainedFPStrict; + + /// Update root to include all chains from the Pending list. + SDValue updateRoot(SmallVectorImpl<SDValue> &Pending); + /// A unique monotonically increasing number used to order the SDNodes we /// create. unsigned SDNodeOrder; @@ -447,12 +461,18 @@ public: /// Return the current virtual root of the Selection DAG, flushing any /// PendingLoad items. This must be done before emitting a store or any other - /// node that may need to be ordered after any prior load instructions. + /// memory node that may need to be ordered after any prior load instructions. + SDValue getMemoryRoot(); + + /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict) + /// items. This must be done before emitting any call other any other node + /// that may need to be ordered after FP instructions due to other side + /// effects. SDValue getRoot(); /// Similar to getRoot, but instead of flushing all the PendingLoad items, - /// flush all the PendingExports items. It is necessary to do this before - /// emitting a terminator instruction. + /// flush all the PendingExports (and PendingConstrainedFPStrict) items. + /// It is necessary to do this before emitting a terminator instruction. SDValue getControlRoot(); SDLoc getCurSDLoc() const { @@ -742,6 +762,7 @@ private: void visitAtomicStore(const StoreInst &I); void visitLoadFromSwiftError(const LoadInst &I); void visitStoreToSwiftError(const StoreInst &I); + void visitFreeze(const FreezeInst &I); void visitInlineAsm(ImmutableCallSite CS); void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bc10f7621239..6fd71393bf38 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -186,7 +186,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMINNUM_IEEE: return "fminnum_ieee"; case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee"; case ISD::FMINIMUM: return "fminimum"; + case ISD::STRICT_FMINIMUM: return "strict_fminimum"; case ISD::FMAXIMUM: return "fmaximum"; + case ISD::STRICT_FMAXIMUM: return "strict_fmaximum"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::STRICT_FSQRT: return "strict_fsqrt"; @@ -270,6 +272,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCCARRY: return "setcccarry"; + case ISD::STRICT_FSETCC: return "strict_fsetcc"; + case ISD::STRICT_FSETCCS: return "strict_fsetccs"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; @@ -308,6 +312,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMULFIX: return "umulfix"; case ISD::UMULFIXSAT: return "umulfixsat"; + case ISD::SDIVFIX: return "sdivfix"; + case ISD::UDIVFIX: return "udivfix"; + // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; case ISD::ZERO_EXTEND: return "zero_extend"; @@ -324,7 +331,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; @@ -541,6 +550,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasVectorReduction()) OS << " vector-reduction"; + if (getFlags().hasNoFPExcept()) + OS << " nofpexcept"; + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { if (!MN->memoperands_empty()) { OS << "<"; @@ -685,6 +697,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (doExt) OS << " from " << MLd->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MLd->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MLd->isExpandingLoad()) OS << ", expanding"; @@ -696,6 +712,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (MSt->isTruncatingStore()) OS << ", trunc to " << MSt->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MSt->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MSt->isCompressingStore()) OS << ", compressing"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1f07a241a824..6c57c72d47a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,8 +27,10 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" @@ -71,10 +73,12 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -147,17 +151,17 @@ static cl::opt<bool> ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, cl::desc("Pop up a window to show dags before legalize types")); static cl::opt<bool> -ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, - cl::desc("Pop up a window to show dags before legalize")); + ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post " + "legalize types dag combine pass")); +static cl::opt<bool> + ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); static cl::opt<bool> ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the second " "dag combine pass")); static cl::opt<bool> -ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, - cl::desc("Pop up a window to show dags before the post legalize types" - " dag combine pass")); -static cl::opt<bool> ViewISelDAGs("view-isel-dags", cl::Hidden, cl::desc("Pop up a window to show isel dags as they are selected")); static cl::opt<bool> @@ -167,12 +171,10 @@ static cl::opt<bool> ViewSUnitDAGs("view-sunit-dags", cl::Hidden, cl::desc("Pop up a window to show SUnit dags after they are processed")); #else -static const bool ViewDAGCombine1 = false, - ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, - ViewDAGCombine2 = false, - ViewDAGCombineLT = false, - ViewISelDAGs = false, ViewSchedDAGs = false, - ViewSUnitDAGs = false; +static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false, + ViewDAGCombineLT = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, ViewISelDAGs = false, + ViewSchedDAGs = false, ViewSUnitDAGs = false; #endif //===---------------------------------------------------------------------===// @@ -305,28 +307,22 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, - CodeGenOpt::Level OL) : - MachineFunctionPass(ID), TM(tm), - FuncInfo(new FunctionLoweringInfo()), - SwiftError(new SwiftErrorValueTracking()), - CurDAG(new SelectionDAG(tm, OL)), - SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)), - AA(), GFI(), - OptLevel(OL), - DAGSize(0) { - initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); - initializeBranchProbabilityInfoWrapperPassPass( - *PassRegistry::getPassRegistry()); - initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); - initializeTargetLibraryInfoWrapperPassPass( - *PassRegistry::getPassRegistry()); - } +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) + : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), + SwiftError(new SwiftErrorValueTracking()), + CurDAG(new SelectionDAG(tm, OL)), + SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError, + OL)), + AA(), GFI(), OptLevel(OL), DAGSize(0) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} SelectionDAGISel::~SelectionDAGISel() { - delete SDB; delete CurDAG; - delete FuncInfo; delete SwiftError; } @@ -340,6 +336,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -442,13 +440,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : + nullptr; LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable<LegacyDivergenceAnalysis>()); + getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI); FuncInfo->set(Fn, *MF, CurDAG); SwiftError->setFunction(*MF); @@ -735,23 +737,20 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, } void SelectionDAGISel::ComputeLiveOutVRegInfo() { - SmallPtrSet<SDNode*, 16> VisitedNodes; + SmallPtrSet<SDNode *, 16> Added; SmallVector<SDNode*, 128> Worklist; Worklist.push_back(CurDAG->getRoot().getNode()); + Added.insert(CurDAG->getRoot().getNode()); KnownBits Known; do { SDNode *N = Worklist.pop_back_val(); - // If we've already seen this node, ignore it. - if (!VisitedNodes.insert(N).second) - continue; - // Otherwise, add all chain operands to the worklist. for (const SDValue &Op : N->op_values()) - if (Op.getValueType() == MVT::Other) + if (Op.getValueType() == MVT::Other && Added.insert(Op.getNode()).second) Worklist.push_back(Op.getNode()); // If this is a CopyToReg with a vreg dest, process it. @@ -793,8 +792,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { FuncInfo->MBB->getBasicBlock()->getName()); #endif #ifdef NDEBUG - if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || - ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewDAGCombineLT || + ViewLegalizeDAGs || ViewDAGCombine2 || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) #endif { @@ -1159,10 +1158,30 @@ void SelectionDAGISel::DoInstructionSelection() { // we convert them to normal FP opcodes instead at this point. This // will allow them to be handled by existing target-specific instruction // selectors. - if (Node->isStrictFPOpcode() && - (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0)) - != TargetLowering::Legal)) - Node = CurDAG->mutateStrictFPToFP(Node); + if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) { + // For some opcodes, we need to call TLI->getOperationAction using + // the first operand type instead of the result type. Note that this + // must match what SelectionDAGLegalize::LegalizeOp is doing. + EVT ActionVT; + switch (Node->getOpcode()) { + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + ActionVT = Node->getOperand(1).getValueType(); + break; + default: + ActionVT = Node->getValueType(0); + break; + } + if (TLI->getOperationAction(Node->getOpcode(), ActionVT) + == TargetLowering::Expand) + Node = CurDAG->mutateStrictFPToFP(Node); + } LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; Node->dump(CurDAG)); @@ -1280,20 +1299,20 @@ bool SelectionDAGISel::PrepareEHLandingPad() { /// side-effect free and is either dead or folded into a generated instruction. /// Return false if it needs to be emitted. static bool isFoldedOrDeadInstruction(const Instruction *I, - FunctionLoweringInfo *FuncInfo) { + const FunctionLoweringInfo &FuncInfo) { return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. !I->isTerminator() && // Terminators aren't folded. - !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. - !I->isEHPad() && // EH pad instructions aren't folded. - !FuncInfo->isExportedInst(I); // Exported instrs must be computed. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !I->isEHPad() && // EH pad instructions aren't folded. + !FuncInfo.isExportedInst(I); // Exported instrs must be computed. } /// Collect llvm.dbg.declare information. This is done after argument lowering /// in case the declarations refer to arguments. -static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { - MachineFunction *MF = FuncInfo->MF; +static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { + MachineFunction *MF = FuncInfo.MF; const DataLayout &DL = MF->getDataLayout(); - for (const BasicBlock &BB : *FuncInfo->Fn) { + for (const BasicBlock &BB : *FuncInfo.Fn) { for (const Instruction &I : BB) { const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I); if (!DI) @@ -1315,11 +1334,11 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { // intrinsic and handle this during isel like dbg.value. int FI = std::numeric_limits<int>::max(); if (const auto *AI = dyn_cast<AllocaInst>(Address)) { - auto SI = FuncInfo->StaticAllocaMap.find(AI); - if (SI != FuncInfo->StaticAllocaMap.end()) + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) FI = SI->second; } else if (const auto *Arg = dyn_cast<Argument>(Address)) - FI = FuncInfo->getArgumentFrameIndex(Arg); + FI = FuncInfo.getArgumentFrameIndex(Arg); if (FI == std::numeric_limits<int>::max()) continue; @@ -1353,7 +1372,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()]; FuncInfo->InsertPt = FuncInfo->MBB->begin(); - CurDAG->setFunctionLoweringInfo(FuncInfo); + CurDAG->setFunctionLoweringInfo(FuncInfo.get()); if (!FastIS) { LowerArguments(Fn); @@ -1393,7 +1412,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FastIS && Inserted) FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); - processDbgDeclares(FuncInfo); + processDbgDeclares(*FuncInfo); // Iterate over all basic blocks in the function. StackProtector &SP = getAnalysis<StackProtector>(); @@ -1453,7 +1472,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *Inst = &*std::prev(BI); // If we no longer require this instruction, skip it. - if (isFoldedOrDeadInstruction(Inst, FuncInfo) || + if (isFoldedOrDeadInstruction(Inst, *FuncInfo) || ElidedArgCopyInstrs.count(Inst)) { --NumFastIselRemaining; continue; @@ -1473,7 +1492,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *BeforeInst = Inst; while (BeforeInst != &*Begin) { BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); - if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + if (!isFoldedOrDeadInstruction(BeforeInst, *FuncInfo)) break; } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && @@ -1589,7 +1608,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // But if FastISel was run, we already selected some of the block. // If we emitted a tail-call, we need to delete any previously emitted // instruction that follows it. - if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end()) + if (FastIS && HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end()) FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end()); } @@ -2230,10 +2249,13 @@ void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) { void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); - MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); - const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0)); + + EVT VT = Op->getValueType(0); + LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT(); Register Reg = - TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), + TLI->getRegisterByName(RegStr->getString().data(), Ty, CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); @@ -2244,10 +2266,13 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); - MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); - const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - Register Reg = TLI->getRegisterByName(RegStr->getString().data(), - Op->getOperand(2).getValueType(), + MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0)); + + EVT VT = Op->getOperand(2).getValueType(); + LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT(); + + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); @@ -3176,13 +3201,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); // Verify that all intermediate nodes between the root and this one have - // a single use. + // a single use (ignoring chains, which are handled in UpdateChains). bool HasMultipleUses = false; - for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) - if (!NodeStack[i].getNode()->hasOneUse()) { - HasMultipleUses = true; - break; - } + for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) { + unsigned NNonChainUses = 0; + SDNode *NS = NodeStack[i].getNode(); + for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI) + if (UI.getUse().getValueType() != MVT::Other) + if (++NNonChainUses > 1) { + HasMultipleUses = true; + break; + } + if (HasMultipleUses) break; + } if (HasMultipleUses) break; // Check to see that the target thinks this is profitable to fold and that @@ -3433,6 +3464,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); + // Check whether any matched node could raise an FP exception. Since all + // such nodes must have a chain, it suffices to check ChainNodesMatched. + // We need to perform this check before potentially modifying one of the + // nodes via MorphNode. + bool MayRaiseFPException = false; + for (auto *N : ChainNodesMatched) + if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) { + MayRaiseFPException = true; + break; + } + // Create the node. MachineSDNode *Res = nullptr; bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || @@ -3464,6 +3506,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, Ops, EmitNodeInfo)); } + // Set the NoFPExcept flag when no original matched node could + // raise an FP exception, but the new node potentially might. + if (!MayRaiseFPException && mayRaiseFPException(Res)) { + SDNodeFlags Flags = Res->getFlags(); + Flags.setNoFPExcept(true); + Res->setFlags(Flags); + } + // If the node had chain/glue results, update our notion of the current // chain and glue. if (EmitNodeInfo & OPFL_GlueOutput) { @@ -3619,6 +3669,21 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } } +/// Return whether the node may raise an FP exception. +bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const { + // For machine opcodes, consult the MCID flag. + if (N->isMachineOpcode()) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + return MCID.mayRaiseFPException(); + } + + // For ISD opcodes, only StrictFP opcodes may raise an FP + // exception. + if (N->isTargetOpcode()) + return N->isTargetStrictFPOpcode(); + return N->isStrictFPOpcode(); +} + bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index fad98b6f50dc..c628f379e415 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -384,7 +384,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // can consider allowing spills of smaller values to larger slots // (i.e. change the '==' in the assert below to a '>='). MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && + assert((MFI.getObjectSize(Index) * 8) == + (int64_t)Incoming.getValueSizeInBits() && "Bad spill: stack slot does not match!"); // Note: Using the alignment of the spill slot (rather than the abi or diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9ab1324533f1..24ab65171a17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -52,6 +52,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const { const Function &F = DAG.getMachineFunction().getFunction(); + // First, check if tail calls have been disabled in this function. + if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + // Conservatively require the attributes of the call to match those of // the return. Ignore NoAlias and NonNull because they don't affect the // call sequence. @@ -122,7 +126,11 @@ std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops, MakeLibCallOptions CallOptions, - const SDLoc &dl) const { + const SDLoc &dl, + SDValue InChain) const { + if (!InChain) + InChain = DAG.getEntryNode(); + TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); @@ -158,7 +166,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, } CLI.setDebugLoc(dl) - .setChain(DAG.getEntryNode()) + .setChain(InChain) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setNoReturn(CallOptions.DoesNotReturn) .setDiscardResult(!CallOptions.IsReturnValueUsed) @@ -277,6 +285,22 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, ISD::CondCode &CCCode, const SDLoc &dl, const SDValue OldLHS, const SDValue OldRHS) const { + SDValue Chain; + return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS, + OldRHS, Chain); +} + +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS, + SDValue &Chain, + bool IsSignaling) const { + // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc + // not supporting it. We can update this code when libgcc provides such + // functions. + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -320,25 +344,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, (VT == MVT::f64) ? RTLIB::OGT_F64 : (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; + case ISD::SETO: + ShouldInvertCC = true; + LLVM_FALLTHROUGH; case ISD::SETUO: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; break; - case ISD::SETO: - LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : - (VT == MVT::f64) ? RTLIB::O_F64 : - (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128; - break; case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : - (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : - (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; - break; + // SETONE = O && UNE + ShouldInvertCC = true; + LLVM_FALLTHROUGH; case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : @@ -382,24 +399,33 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, EVT OpsVT[2] = { OldLHS.getValueType(), OldRHS.getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; + auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewLHS = Call.first; NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); - if (ShouldInvertCC) - CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); - - if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode( - ISD::SETCC, dl, - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), - NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; - NewLHS = DAG.getNode( - ISD::SETCC, dl, - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), - NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); - NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + if (ShouldInvertCC) { + assert(RetVT.isInteger()); + CCCode = getSetCCInverse(CCCode, RetVT); + } + + if (LC2 == RTLIB::UNKNOWN_LIBCALL) { + // Update Chain. + Chain = Call.second; + } else { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT); + SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode); + auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain); + CCCode = getCmpLibcallCC(LC2); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, RetVT); + NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second, + Call2.second); + NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl, + Tmp.getValueType(), Tmp, NewLHS); NewRHS = SDValue(); } } @@ -693,6 +719,27 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return Op.getOperand(1); break; } + case ISD::SETCC: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + // If (1) we only need the sign-bit, (2) the setcc operands are the same + // width as the setcc result, and (3) the result of a setcc conforms to 0 or + // -1, we may be able to bypass the setcc. + if (DemandedBits.isSignMask() && + Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() && + getBooleanContents(Op0.getValueType()) == + BooleanContent::ZeroOrNegativeOneBooleanContent) { + // If we're testing X < 0, then this compare isn't needed - just use X! + // FIXME: We're limiting to integer types here, but this should also work + // if we don't care about FP signed-zero. The use of SETLT with FP means + // that we don't care about NaNs. + if (CC == ISD::SETLT && Op1.getValueType().isInteger() && + (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) + return Op0; + } + break; + } case ISD::SIGN_EXTEND_INREG: { // If none of the extended bits are demanded, eliminate the sextinreg. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1251,7 +1298,7 @@ bool TargetLowering::SimplifyDemandedBits( // -1, we may be able to bypass the setcc. if (DemandedBits.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && - getBooleanContents(VT) == + getBooleanContents(Op0.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! // FIXME: We're limiting to integer types here, but this should also work @@ -1538,6 +1585,16 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero = Known2.Zero.reverseBits(); break; } + case ISD::BSWAP: { + SDValue Src = Op.getOperand(0); + APInt DemandedSrcBits = DemandedBits.byteSwap(); + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + Known.One = Known2.One.byteSwap(); + Known.Zero = Known2.Zero.byteSwap(); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1753,15 +1810,11 @@ bool TargetLowering::SimplifyDemandedBits( // undesirable. break; - auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); - if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth)) + SDValue ShAmt = Src.getOperand(1); + auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt); + if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) break; - - SDValue Shift = Src.getOperand(1); - uint64_t ShVal = ShAmt->getZExtValue(); - - if (TLO.LegalTypes()) - Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); + uint64_t ShVal = ShAmtC->getZExtValue(); APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); @@ -1771,10 +1824,12 @@ bool TargetLowering::SimplifyDemandedBits( if (!(HighBits & DemandedBits)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. + if (TLO.LegalTypes()) + ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); return TLO.CombineTo( - Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt)); } break; } @@ -1818,6 +1873,17 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedSrcBits.isAllOnesValue() || + !DemandedSrcElts.isAllOnesValue()) { + if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) { + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx); + return TLO.CombineTo(Op, NewOp); + } + } + Known = Known2; if (BitWidth > EltBitWidth) Known = Known.zext(BitWidth, false /* => any extend */); @@ -2808,7 +2874,8 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, // Note that where Y is variable and is known to have at most one bit set // (for example, if it is Z & 1) we cannot do this; the expressions are not // equivalent when Y == 0. - Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + assert(OpVT.isInteger()); + Cond = ISD::getSetCCInverse(Cond, OpVT); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(Cond, N0.getSimpleValueType())) return DAG.getSetCC(DL, VT, N0, Zero, Cond); @@ -2897,7 +2964,8 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( // What if we invert constants? (and the target predicate) I1.negate(); I01.negate(); - NewCond = getSetCCInverse(NewCond, /*isInteger=*/true); + assert(XVT.isInteger()); + NewCond = getSetCCInverse(NewCond, XVT); if (!checkConstants()) return SDValue(); // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256 @@ -3052,6 +3120,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAGCombinerInfo &DCI, const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; + const DataLayout &Layout = DAG.getDataLayout(); EVT OpVT = N0.getValueType(); // Constant fold or commute setcc. @@ -3132,7 +3201,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) SDValue Zero = DAG.getConstant(0, dl, CTVT); SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true); + assert(CTVT.isInteger()); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); @@ -3223,7 +3293,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode InvCond = ISD::getSetCCInverse( cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), - TopSetCC.getOperand(0).getValueType().isInteger()); + TopSetCC.getOperand(0).getValueType()); return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), TopSetCC.getOperand(1), InvCond); @@ -3256,7 +3326,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { if (Mask.isSubsetOf(newMask)) { - if (DAG.getDataLayout().isLittleEndian()) + if (Layout.isLittleEndian()) bestOffset = (uint64_t)offset * (width/8); else bestOffset = (origWidth/width - offset - 1) * (width/8); @@ -3272,11 +3342,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound() && shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { - EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) - Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), - DAG.getConstant(bestOffset, dl, PtrType)); + Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad( newVT, dl, Lod->getChain(), Ptr, @@ -3332,8 +3400,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && isCondCodeLegal(Cond, newVT.getSimpleVT()))) { - EVT NewSetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); + EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT); SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), @@ -3379,14 +3446,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && - isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { + isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && + (N0.getValueType() == MVT::i1 || + getBooleanContents(N0.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent)) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, - N0.getOperand(0).getValueType().isInteger()); + CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType()); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); @@ -3420,10 +3489,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } - } else if (N1C->isOne() && - (VT == MVT::i1 || - getBooleanContents(N0->getValueType(0)) == - ZeroOrOneBooleanContent)) { + } else if (N1C->isOne()) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -3431,10 +3497,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((Op0.getOpcode() == ISD::XOR) && Op0.getOperand(0).getOpcode() == ISD::SETCC && Op0.getOperand(1).getOpcode() == ISD::SETCC) { - // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) - Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; - return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), - Cond); + SDValue XorLHS = Op0.getOperand(0); + SDValue XorRHS = Op0.getOperand(1); + // Ensure that the input setccs return an i1 type or 0/1 value. + if (Op0.getValueType() == MVT::i1 || + (getBooleanContents(XorLHS.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent && + getBooleanContents(XorRHS.getOperand(0).getValueType()) == + ZeroOrOneBooleanContent)) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond); + } } if (Op0.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op0.getOperand(1)) && @@ -3611,14 +3685,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { - auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); + EVT ShiftTy = + getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. unsigned ShCt = AndRHS->getAPIntValue().logBase2(); if (AndRHS->getAPIntValue().isPowerOf2() && - ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -3628,7 +3702,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Perform the xform if C1 is a single bit. unsigned ShCt = C1.logBase2(); if (C1.isPowerOf2() && - ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { + !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShCt, dl, ShiftTy))); @@ -3639,6 +3713,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (C1.getMinSignedBits() <= 64 && !isLegalICmpImmediate(C1.getSExtValue())) { + EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); // (X & -256) == 256 -> (X >> 8) == 1 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) { @@ -3646,15 +3721,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - auto &DL = DAG.getDataLayout(); - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); - EVT CmpTy = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), - DAG.getConstant(ShiftBits, dl, - ShiftTy)); - SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); - return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + SDValue Shift = + DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } } } } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || @@ -3676,14 +3749,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC.lshrInPlace(ShiftBits); if (ShiftBits && NewC.getMinSignedBits() <= 64 && - isLegalICmpImmediate(NewC.getSExtValue())) { - auto &DL = DAG.getDataLayout(); - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); - EVT CmpTy = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + isLegalICmpImmediate(NewC.getSExtValue()) && + !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); + SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); } } @@ -4480,6 +4550,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); + // Indirect 'other' or 'immediate' constraints are not allowed. + if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || + CType == TargetLowering::C_Register || + CType == TargetLowering::C_RegisterClass)) + continue; + // If this is an 'other' or 'immediate' constraint, see if the operand is // valid for it. For example, on X86 we might have an 'rI' constraint. If // the operand is an integer in the range [0..31] we want to use I (saving a @@ -4905,7 +4981,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { - SmallVector<SDNode *, 2> Built; + SmallVector<SDNode *, 5> Built; if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, DCI, DL, Built)) { for (SDNode *N : Built) @@ -4940,26 +5016,44 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Could support comparing with non-zero too. - ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!CompTarget || !CompTarget->isNullValue()) - return SDValue(); - - bool HadOneDivisor = false; - bool AllDivisorsAreOnes = true; + bool ComparingWithAllZeros = true; + bool AllComparisonsWithNonZerosAreTautological = true; + bool HadTautologicalLanes = false; + bool AllLanesAreTautological = true; bool HadEvenDivisor = false; bool AllDivisorsArePowerOfTwo = true; - SmallVector<SDValue, 16> PAmts, KAmts, QAmts; + bool HadTautologicalInvertedLanes = false; + SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts; - auto BuildUREMPattern = [&](ConstantSDNode *C) { + auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (C->isNullValue()) + if (CDiv->isNullValue()) return false; - const APInt &D = C->getAPIntValue(); - // If all divisors are ones, we will prefer to avoid the fold. - HadOneDivisor |= D.isOneValue(); - AllDivisorsAreOnes &= D.isOneValue(); + const APInt &D = CDiv->getAPIntValue(); + const APInt &Cmp = CCmp->getAPIntValue(); + + ComparingWithAllZeros &= Cmp.isNullValue(); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we will only be able to produce the comparison that will give the + // opposive tautological answer. So this lane would need to be fixed up. + bool TautologicalInvertedLane = D.ule(Cmp); + HadTautologicalInvertedLanes |= TautologicalInvertedLane; + + // If all lanes are tautological (either all divisors are ones, or divisor + // is not greater than the constant we are comparing with), + // we will prefer to avoid the fold. + bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; + HadTautologicalLanes |= TautologicalLane; + AllLanesAreTautological &= TautologicalLane; + + // If we are comparing with non-zero, we need'll need to subtract said + // comparison value from the LHS. But there is no point in doing that if + // every lane where we are comparing with non-zero is tautological.. + if (!Cmp.isNullValue()) + AllComparisonsWithNonZerosAreTautological &= TautologicalLane; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); @@ -4981,19 +5075,27 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); - // Q = floor((2^W - 1) / D) - APInt Q = APInt::getAllOnesValue(W).udiv(D); + // Q = floor((2^W - 1) u/ D) + // R = ((2^W - 1) u% D) + APInt Q, R; + APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); + + // If we are comparing with zero, then that comparison constant is okay, + // else it may need to be one less than that. + if (Cmp.ugt(R)) + Q -= 1; assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); - // If the divisor is 1 the result can be constant-folded. - if (D.isOneValue()) { + // If the lane is tautological the result can be constant-folded. + if (TautologicalLane) { // Set P and K amount to a bogus values so we can try to splat them. P = 0; K = -1; - assert(Q.isAllOnesValue() && - "Expecting all-ones comparison for one divisor"); + // And ensure that comparison constant is tautological, + // it will always compare true/false. + Q = -1; } PAmts.push_back(DAG.getConstant(P, DL, SVT)); @@ -5007,11 +5109,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue D = REMNode.getOperand(1); // Collect the values from each element. - if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern)) return SDValue(); - // If this is a urem by a one, avoid the fold since it can be constant-folded. - if (AllDivisorsAreOnes) + // If all lanes are tautological, the result can be constant-folded. + if (AllLanesAreTautological) return SDValue(); // If this is a urem by a powers-of-two, avoid the fold since it can be @@ -5021,7 +5123,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue PVal, KVal, QVal; if (VT.isVector()) { - if (HadOneDivisor) { + if (HadTautologicalLanes) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. turnVectorIntoSplatVector(PAmts, isNullConstant); @@ -5040,6 +5142,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, QVal = QAmts[0]; } + if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) { + if (!isOperationLegalOrCustom(ISD::SUB, VT)) + return SDValue(); // FIXME: Could/should use `ISD::ADD`? + assert(CompTargetNode.getValueType() == N.getValueType() && + "Expecting that the types on LHS and RHS of comparisons match."); + N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode); + } + // (mul N, P) SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); Created.push_back(Op0.getNode()); @@ -5058,8 +5168,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op0, QVal, - ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + SDValue NewCC = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + if (!HadTautologicalInvertedLanes) + return NewCC; + + // If any lanes previously compared always-false, the NewCC will give + // always-true result for them, so we need to fixup those lanes. + // Or the other way around for inequality predicate. + assert(VT.isVector() && "Can/should only get here for vectors."); + Created.push_back(NewCC.getNode()); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we have produced the comparison that will give the + // opposive tautological answer. So these lanes would need to be fixed up. + SDValue TautologicalInvertedChannels = + DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE); + Created.push_back(TautologicalInvertedChannels.getNode()); + + if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) { + // If we have a vector select, let's replace the comparison results in the + // affected lanes with the correct tautological result. + SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true, + DL, SETCCVT, SETCCVT); + return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels, + Replacement, NewCC); + } + + // Else, we can just invert the comparison result in the appropriate lanes. + if (isOperationLegalOrCustom(ISD::XOR, SETCCVT)) + return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC, + TautologicalInvertedChannels); + + return SDValue(); // Don't know how to lower. } /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE @@ -5544,7 +5687,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, ForCodeSize, Depth + 1); char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, ForCodeSize, Depth + 1); - if (V0 >= V1) { + // TODO: This is a hack. It is possible that costs have changed between now + // and the initial calls to isNegatibleForFree(). That is because we + // are rewriting the expression, and that may change the number of + // uses (and therefore the cost) of values. If the negation costs are + // equal, only negate this value if it is a constant. Otherwise, try + // operand 1. A better fix would eliminate uses as a cost factor or + // track the change in uses as we rewrite the expression. + if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) { // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) SDValue Neg0 = getNegatedExpression( Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); @@ -5954,6 +6104,8 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, EVT DstVT = Node->getValueType(0); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + EVT DstSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); // Only expand vector types if we have the appropriate vector bit operations. unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : @@ -5980,7 +6132,15 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); - SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + SDValue Sel; + + if (Node->isStrictFPOpcode()) { + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + Node->getOperand(0), /*IsSignaling*/ true); + Chain = Sel.getValue(1); + } else { + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + } bool Strict = Node->isStrictFPOpcode() || shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); @@ -5989,28 +6149,29 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). // Sel = Src < 0x8000000000000000 - // Val = select Sel, Src, Src - 0x8000000000000000 - // Ofs = select Sel, 0, 0x8000000000000000 - // Result = fp_to_sint(Val) ^ Ofs + // FltOfs = select Sel, 0, 0x8000000000000000 + // IntOfs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue SrcBiased; - if (Node->isStrictFPOpcode()) - SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, - { Node->getOperand(0), Src, Cst }); - else - SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); - SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), - DAG.getConstant(SignMask, dl, DstVT)); + SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel, + DAG.getConstantFP(0.0, dl, SrcVT), Cst); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); + SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel, + DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Chain, Src, FltOfs }); SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { SrcBiased.getValue(1), Val }); + { Val.getValue(1), Val }); Chain = SInt.getValue(1); - } else + } else { + SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs); SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); - Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); + } + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) @@ -6023,14 +6184,17 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); False = DAG.getNode(ISD::XOR, dl, DstVT, False, DAG.getConstant(SignMask, dl, DstVT)); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); Result = DAG.getSelect(dl, DstVT, Sel, True, False); } return true; } bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -6052,17 +6216,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return false; // For unsigned conversions, convert them to signed conversions using the - // algorithm from the x86_64 __floatundidf in compiler_rt. - SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); - - SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); - SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); - SDValue AndConst = DAG.getConstant(1, dl, SrcVT); - SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); - SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); - - SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); - SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + // algorithm from the x86_64 __floatundisf in compiler_rt. // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right @@ -6073,6 +6227,37 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue SignBitTest = DAG.getSetCC( dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + + SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, SrcVT); + SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); + + SDValue Slow, Fast; + if (Node->isStrictFPOpcode()) { + // In strict mode, we must avoid spurious exceptions, and therefore + // must make sure to only emit a single STRICT_SINT_TO_FP. + SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src); + Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), InCvt }); + Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, + { Fast.getValue(1), Fast, Fast }); + Chain = Slow.getValue(1); + // The STRICT_SINT_TO_FP inherits the exception mode from the + // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can + // never raise any exception. + SDNodeFlags Flags; + Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); + Fast->setFlags(Flags); + Flags.setNoFPExcept(true); + Slow->setFlags(Flags); + } else { + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); + Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); + } + Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast); return true; } @@ -6105,8 +6290,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + if (Node->isStrictFPOpcode()) { + SDValue HiSub = + DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, + {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); + Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, + {HiSub.getValue(1), LoFlt, HiSub}); + Chain = Result.getValue(1); + } else { + SDValue HiSub = + DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + } return true; } @@ -6150,6 +6345,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, } } + // If none of the above worked, but there are no NaNs, then expand to + // a compare/select sequence. This is required for correctness since + // InstCombine might have canonicalized a fcmp+select sequence to a + // FMINNUM/FMAXNUM node. If we were to fall through to the default + // expansion to libcall, we might introduce a link-time dependency + // on libm into a file that originally did not have one. + if (Node->getFlags().hasNoNaNs()) { + ISD::CondCode Pred = + Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred); + // Copy FMF flags, but always set the no-signed-zeros flag + // as this is implied by the FMINNUM/FMAXNUM semantics. + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoSignedZeros(true); + SelCC->setFlags(Flags); + return SelCC; + } + return SDValue(); } @@ -6342,8 +6557,9 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, return true; } -SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, - SelectionDAG &DAG) const { +std::pair<SDValue, SDValue> +TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, + SelectionDAG &DAG) const { SDLoc SL(LD); SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); @@ -6377,7 +6593,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); - return DAG.getMergeValues({Value, NewChain}, SL); + return std::make_pair(Value, NewChain); } SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, @@ -6471,10 +6687,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (!isOperationLegalOrCustom(ISD::LOAD, intVT) && LoadedVT.isVector()) { // Scalarize the load and let the individual components be handled. - SDValue Scalarized = scalarizeVectorLoad(LD, DAG); - if (Scalarized->getOpcode() == ISD::MERGE_VALUES) - return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); - return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); + return scalarizeVectorLoad(LD, DAG); } // Expand to a (misaligned) integer load of the same size, @@ -6807,7 +7020,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); - return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index); + return DAG.getMemBasePlusOffset(VecPtr, Index, dl); } //===----------------------------------------------------------------------===// @@ -7096,6 +7309,86 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { return Result; } +SDValue +TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, + SDValue LHS, SDValue RHS, + unsigned Scale, SelectionDAG &DAG) const { + assert((Opcode == ISD::SDIVFIX || + Opcode == ISD::UDIVFIX) && + "Expected a fixed point division opcode"); + + EVT VT = LHS.getValueType(); + bool Signed = Opcode == ISD::SDIVFIX; + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + // If there is enough room in the type to upscale the LHS or downscale the + // RHS before the division, we can perform it in this type without having to + // resize. For signed operations, the LHS headroom is the number of + // redundant sign bits, and for unsigned ones it is the number of zeroes. + // The headroom for the RHS is the number of trailing zeroes. + unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1 + : DAG.computeKnownBits(LHS).countMinLeadingZeros(); + unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros(); + + if (LHSLead + RHSTrail < Scale) + return SDValue(); + + unsigned LHSShift = std::min(LHSLead, Scale); + unsigned RHSShift = Scale - LHSShift; + + // At this point, we know that if we shift the LHS up by LHSShift and the + // RHS down by RHSShift, we can emit a regular division with a final scaling + // factor of Scale. + + EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); + if (LHSShift) + LHS = DAG.getNode(ISD::SHL, dl, VT, LHS, + DAG.getConstant(LHSShift, dl, ShiftTy)); + if (RHSShift) + RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS, + DAG.getConstant(RHSShift, dl, ShiftTy)); + + SDValue Quot; + if (Signed) { + // For signed operations, if the resulting quotient is negative and the + // remainder is nonzero, subtract 1 from the quotient to round towards + // negative infinity. + SDValue Rem; + // FIXME: Ideally we would always produce an SDIVREM here, but if the + // type isn't legal, SDIVREM cannot be expanded. There is no reason why + // we couldn't just form a libcall, but the type legalizer doesn't do it. + if (isTypeLegal(VT) && + isOperationLegalOrCustom(ISD::SDIVREM, VT)) { + Quot = DAG.getNode(ISD::SDIVREM, dl, + DAG.getVTList(VT, VT), + LHS, RHS); + Rem = Quot.getValue(1); + Quot = Quot.getValue(0); + } else { + Quot = DAG.getNode(ISD::SDIV, dl, VT, + LHS, RHS); + Rem = DAG.getNode(ISD::SREM, dl, VT, + LHS, RHS); + } + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE); + SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT); + SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT); + SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg); + SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot, + DAG.getConstant(1, dl, VT)); + Quot = DAG.getSelect(dl, VT, + DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg), + Sub1, Quot); + } else + Quot = DAG.getNode(ISD::UDIV, dl, VT, + LHS, RHS); + + // TODO: Saturation. + + return Quot; +} + void TargetLowering::expandUADDSUBO( SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const { SDLoc dl(Node); |