diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1429 |
1 files changed, 844 insertions, 585 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 647496c1afcb..5852e693fa9f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1,9 +1,8 @@ //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -86,6 +85,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} void SelectionDAG::DAGNodeDeletedListener::anchor() {} @@ -262,12 +262,7 @@ bool ISD::allOperandsUndef(const SDNode *N) { // is probably the desired behavior. if (N->getNumOperands() == 0) return false; - - for (const SDValue &Op : N->op_values()) - if (!Op.isUndef()) - return false; - - return true; + return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); } bool ISD::matchUnaryPredicate(SDValue Op, @@ -299,8 +294,8 @@ bool ISD::matchUnaryPredicate(SDValue Op, bool ISD::matchBinaryPredicate( SDValue LHS, SDValue RHS, std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, - bool AllowUndefs) { - if (LHS.getValueType() != RHS.getValueType()) + bool AllowUndefs, bool AllowTypeMismatch) { + if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType()) return false; // TODO: Add support for scalar UNDEF cases? @@ -323,8 +318,8 @@ bool ISD::matchBinaryPredicate( auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp); if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) return false; - if (LHSOp.getValueType() != SVT || - LHSOp.getValueType() != RHSOp.getValueType()) + if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT || + LHSOp.getValueType() != RHSOp.getValueType())) return false; if (!Match(LHSCst, RHSCst)) return false; @@ -518,6 +513,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::TargetFrameIndex: ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); break; + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + if (cast<LifetimeSDNode>(N)->hasOffset()) { + ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); + ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); + } + break; case ISD::JumpTable: case ISD::TargetJumpTable: ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); @@ -834,6 +836,8 @@ void SelectionDAG::InsertNode(SDNode *N) { N->PersistentId = NextPersistentId++; VerifySDNode(N); #endif + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeInserted(N); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -1136,6 +1140,18 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { getConstant(Imm, DL, Op.getValueType())); } +SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { + // Only unsigned pointer semantics are supported right now. In the future this + // might delegate to TLI to check pointer signedness. + return getZExtOrTrunc(Op, DL, VT); +} + +SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { + // Only unsigned pointer semantics are supported right now. In the future this + // might delegate to TLI to check pointer signedness. + return getZeroExtendInReg(Op, DL, VT); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); @@ -1274,6 +1290,12 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); } +SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, + const SDLoc &DL, bool LegalTypes) { + EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); + return getConstant(Val, DL, ShiftVT); +} + SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, bool isTarget) { return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); @@ -1403,7 +1425,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction().optForSize() + Alignment = MF->getFunction().hasOptSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -1770,7 +1792,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); + auto *N = + newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -1965,10 +1988,30 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, case ISD::SETUO: case ISD::SETUEQ: case ISD::SETUNE: - assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + assert(!OpVT.isInteger() && "Illegal setcc for integer!"); break; } + if (OpVT.isInteger()) { + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + // icmp eq/ne X, undef -> undef. + if ((N1.isUndef() || N2.isUndef()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) + return getUNDEF(VT); + + // If both operands are undef, we can return undef for int comparison. + // icmp undef, undef -> undef. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + + // icmp X, X -> true/false + // icmp X, undef -> true/false because undef could be X. + if (N1 == N2) + return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); + } + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { const APInt &C2 = N2C->getAPIntValue(); if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { @@ -1989,71 +2032,88 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, } } } - if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) { - if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) { - APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); - switch (Cond) { - default: break; - case ISD::SETEQ: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETNE: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpLessThan, dl, VT, - OpVT); - case ISD::SETLT: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, - OpVT); - case ISD::SETGT: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, - VT, OpVT); - case ISD::SETLE: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || - R==APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETGE: if (R==APFloat::cmpUnordered) - return getUNDEF(VT); - LLVM_FALLTHROUGH; - case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual, dl, VT, OpVT); - case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, - OpVT); - case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, - OpVT); - case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, - OpVT); - case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan, dl, VT, - OpVT); - case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpUnordered, dl, VT, - OpVT); - case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, - VT, OpVT); - case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, - OpVT); - } - } else { - // Ensure that the constant occurs on the RHS. - ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); - MVT CompVT = N1.getValueType().getSimpleVT(); - if (!TLI->isCondCodeLegal(SwappedCond, CompVT)) - return SDValue(); - return getSetCC(dl, VT, N2, N1, SwappedCond); + auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + + if (N1CFP && N2CFP) { + APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, dl, VT, OpVT); + case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, + OpVT); + } + } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { + // Ensure that the constant occurs on the RHS. + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) + return SDValue(); + return getSetCC(dl, VT, N2, N1, SwappedCond); + } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || + (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { + // If an operand is known to be a nan (or undef that could be a nan), we can + // fold it. + // Choosing NaN for the undef will always make unordered comparison succeed + // and ordered comparison fails. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + switch (ISD::getUnorderedFlavor(Cond)) { + default: + llvm_unreachable("Unknown flavor!"); + case 0: // Known false. + return getBoolConstant(false, dl, VT, OpVT); + case 1: // Known true. + return getBoolConstant(true, dl, VT, OpVT); + case 2: // Undefined. + return getUNDEF(VT); } } @@ -2062,16 +2122,32 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, } /// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by Mask are used. -SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { +/// the bits specified by DemandedBits are used. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return GetDemandedBits(V, DemandedBits, DemandedElts); +} + +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by DemandedBits are used in the elements specified by +/// DemandedElts. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, + const APInt &DemandedElts) { switch (V.getOpcode()) { default: break; case ISD::Constant: { - const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + auto *CV = cast<ConstantSDNode>(V.getNode()); assert(CV && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); - APInt NewVal = CVal & Mask; + APInt NewVal = CVal & DemandedBits; if (NewVal != CVal) return getConstant(NewVal, SDLoc(V), V.getValueType()); break; @@ -2079,44 +2155,51 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { case ISD::OR: case ISD::XOR: // If the LHS or RHS don't contribute bits to the or, drop them. - if (MaskedValueIsZero(V.getOperand(0), Mask)) + if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) return V.getOperand(1); - if (MaskedValueIsZero(V.getOperand(1), Mask)) + if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) return V.getOperand(0); break; case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) break; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { // See if we can recursively simplify the LHS. unsigned Amt = RHSC->getZExtValue(); // Watch out for shift count overflow though. - if (Amt >= Mask.getBitWidth()) + if (Amt >= DemandedBits.getBitWidth()) break; - APInt NewMask = Mask << Amt; - if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) + APInt SrcDemandedBits = DemandedBits << Amt; + if (SDValue SimplifyLHS = + GetDemandedBits(V.getOperand(0), SrcDemandedBits)) return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } break; case ISD::AND: { // X & -1 -> X (ignoring bits which aren't demanded). - ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); - if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue())) - return V.getOperand(0); + // Also handle the case where masked out bits in X are known to be zero. + if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) { + const APInt &AndVal = RHSC->getAPIntValue(); + if (DemandedBits.isSubsetOf(AndVal) || + DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero | + AndVal)) + return V.getOperand(0); + } break; } case ISD::ANY_EXTEND: { SDValue Src = V.getOperand(0); unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); // Being conservative here - only peek through if we only demand bits in the - // non-extended source (even though the extended bits are technically undef). - if (Mask.getActiveBits() > SrcBitWidth) + // non-extended source (even though the extended bits are technically + // undef). + if (DemandedBits.getActiveBits() > SrcBitWidth) break; - APInt SrcMask = Mask.trunc(SrcBitWidth); - if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask)) + APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); + if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits)) return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } @@ -2125,7 +2208,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { unsigned ExVTBits = ExVT.getScalarSizeInBits(); // If none of the extended bits are demanded, eliminate the sextinreg. - if (Mask.getActiveBits() <= ExVTBits) + if (DemandedBits.getActiveBits() <= ExVTBits) return V.getOperand(0); break; @@ -2143,9 +2226,28 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. -bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, + unsigned Depth) const { + EVT VT = V.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return MaskedValueIsZero(V, Mask, DemandedElts, Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in +/// DemandedElts. We use this predicate to simplify operations downstream. +/// Mask is known to be zero for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, + const APInt &DemandedElts, unsigned Depth) const { - return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero); + return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); +} + +/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. +bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, + unsigned Depth) const { + return Mask.isSubsetOf(computeKnownBits(V, Depth).One); } /// isSplatValue - Return true if the vector V has the same value @@ -2244,28 +2346,50 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { (AllowUndefs || !UndefElts); } -/// Helper function that checks to see if a node is a constant or a -/// build vector of splat constants at least within the demanded elts. -static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N, - const APInt &DemandedElts) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) - return CN; - if (N.getOpcode() != ISD::BUILD_VECTOR) - return nullptr; - EVT VT = N.getValueType(); - ConstantSDNode *Cst = nullptr; - unsigned NumElts = VT.getVectorNumElements(); - assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size"); - for (unsigned i = 0; i != NumElts; ++i) { - if (!DemandedElts[i]) - continue; - ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i)); - if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) || - C->getValueType(0) != VT.getScalarType()) - return nullptr; - Cst = C; +SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { + V = peekThroughExtractSubvectors(V); + + EVT VT = V.getValueType(); + unsigned Opcode = V.getOpcode(); + switch (Opcode) { + default: { + APInt UndefElts; + APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (isSplatValue(V, DemandedElts, UndefElts)) { + // Handle case where all demanded elements are UNDEF. + if (DemandedElts.isSubsetOf(UndefElts)) { + SplatIdx = 0; + return getUNDEF(VT); + } + SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); + return V; + } + break; + } + case ISD::VECTOR_SHUFFLE: { + // Check if this is a shuffle node doing a splat. + // TODO - remove this and rely purely on SelectionDAG::isSplatValue, + // getTargetVShiftNode currently struggles without the splat source. + auto *SVN = cast<ShuffleVectorSDNode>(V); + if (!SVN->isSplat()) + break; + int Idx = SVN->getSplatIndex(); + int NumElts = V.getValueType().getVectorNumElements(); + SplatIdx = Idx % NumElts; + return V.getOperand(Idx / NumElts); } - return Cst; + } + + return SDValue(); +} + +SDValue SelectionDAG::getSplatValue(SDValue V) { + int SplatIdx; + if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) + return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), + SrcVector.getValueType().getScalarType(), SrcVector, + getIntPtrConstant(SplatIdx, SDLoc(V))); + return SDValue(); } /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that @@ -2708,8 +2832,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; case ISD::FSHL: case ISD::FSHR: - if (ConstantSDNode *C = - isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) { + if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { unsigned Amt = C->getAPIntValue().urem(BitWidth); // For fshl, 0-shift returns the 1st arg. @@ -2801,8 +2924,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); - // If this is a ZEXTLoad and we are looking at the loaded value. - if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + const Constant *Cst = TLI->getTargetConstantFromLoad(LD); + if (ISD::isNON_EXTLoad(LD) && Cst) { + // Determine any common known bits from the loaded constant pool value. + Type *CstTy = Cst->getType(); + if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) { + // If its a vector splat, then we can (quickly) reuse the scalar path. + // NOTE: We assume all elements match and none are UNDEF. + if (CstTy->isVectorTy()) { + if (const Constant *Splat = Cst->getSplatValue()) { + Cst = Splat; + CstTy = Cst->getType(); + } + } + // TODO - do we need to handle different bitwidths? + if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) { + // Iterate across all vector elements finding common known bits. + Known.One.setAllBits(); + Known.Zero.setAllBits(); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + if (Constant *Elt = Cst->getAggregateElement(i)) { + if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { + const APInt &Value = CInt->getValue(); + Known.One &= Value; + Known.Zero &= ~Value; + continue; + } + if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Known.One &= Value; + Known.Zero &= ~Value; + continue; + } + } + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); + break; + } + } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { + if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { + const APInt &Value = CInt->getValue(); + Known.One = Value; + Known.Zero = ~Value; + } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Known.One = Value; + Known.Zero = ~Value; + } + } + } + } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + // If this is a ZEXTLoad and we are looking at the loaded value. EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); Known.Zero.setBitsFrom(MemBits); @@ -2816,15 +2990,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); - Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); break; } case ISD::ZERO_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { @@ -2845,7 +3016,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::ANY_EXTEND: { Known = computeKnownBits(Op.getOperand(0), Depth+1); - Known = Known.zext(BitWidth); + Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */); break; } case ISD::TRUNCATE: { @@ -2878,39 +3049,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, LLVM_FALLTHROUGH; case ISD::SUB: case ISD::SUBC: { - if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) { - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (CLHS->getAPIntValue().isNonNegative()) { - unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, - Depth + 1); - - // If all of the MaskV bits are known to be zero, then we know the - // output top bits are zero, because we now know that the output is - // from [0-C]. - if ((Known2.Zero & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); - // Top bits known zero. - Known.Zero.setHighBits(NLZ2); - } - } - } - - // If low bits are know to be zero in both operands, then we know they are - // going to be 0 in the result. Both addition and complement operations - // preserve the low zero bits. - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - unsigned KnownZeroLow = Known2.countMinTrailingZeros(); - if (KnownZeroLow == 0) - break; - + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); - Known.Zero.setLowBits(KnownZeroLow); + Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, + Known, Known2); break; } case ISD::UADDO: @@ -2928,34 +3070,26 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::ADD: case ISD::ADDC: case ISD::ADDE: { - // Output known-0 bits are known if clear or set in both the low clear bits - // common to both LHS & RHS. For example, 8+(X<<3) is known to have the - // low 3 bits clear. - // Output known-0 bits are also known if the top bits of each input are - // known to be clear. For example, if one input has the top 10 bits clear - // and the other has the top 8 bits clear, we know the top 7 bits of the - // output must be clear. - Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); - unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); + + // With ADDE and ADDCARRY, a carry bit may be added in. + KnownBits Carry(1); + if (Opcode == ISD::ADDE) + // Can't track carry from glue, set carry to unknown. + Carry.resetAll(); + else if (Opcode == ISD::ADDCARRY) + // TODO: Compute known bits for the carry operand. Not sure if it is worth + // the trouble (how often will we find a known carry bit). And I haven't + // tested this very much yet, but something like this might work: + // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Carry = Carry.zextOrTrunc(1, false); + Carry.resetAll(); + else + Carry.setAllZero(); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); - KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); - - if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) { - // With ADDE and ADDCARRY, a carry bit may be added in, so we can only - // use this information if we know (at least) that the low two bits are - // clear. We then return to the caller that the low bit is unknown but - // that other bits are known zero. - if (KnownZeroLow >= 2) - Known.Zero.setBits(1, KnownZeroLow); - break; - } - - Known.Zero.setLowBits(KnownZeroLow); - if (KnownZeroHigh > 1) - Known.Zero.setHighBits(KnownZeroHigh - 1); + Known = KnownBits::computeForAddCarry(Known, Known2, Carry); break; } case ISD::SREM: @@ -3010,21 +3144,20 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::EXTRACT_ELEMENT: { Known = computeKnownBits(Op.getOperand(0), Depth+1); const unsigned Index = Op.getConstantOperandVal(1); - const unsigned BitWidth = Op.getValueSizeInBits(); + const unsigned EltBitWidth = Op.getValueSizeInBits(); // Remove low part of known bits mask - Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth); - Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth); + Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth); + Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth); // Remove high part of known bit mask - Known = Known.trunc(BitWidth); + Known = Known.trunc(EltBitWidth); break; } case ISD::EXTRACT_VECTOR_ELT: { SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); EVT VecVT = InVec.getValueType(); - const unsigned BitWidth = Op.getValueSizeInBits(); const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); const unsigned NumSrcElts = VecVT.getVectorNumElements(); // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know @@ -3042,7 +3175,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(InVec, Depth + 1); } if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth); + Known = Known.zext(BitWidth, false /* => any extend */); break; } case ISD::INSERT_VECTOR_ELT: { @@ -3146,10 +3279,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // the minimum of the clamp min/max range. bool IsMax = (Opcode == ISD::SMAX); ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; - if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) - CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), - DemandedElts); + CstHigh = + isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); if (CstLow && CstHigh) { if (!IsMax) std::swap(CstLow, CstHigh); @@ -3430,7 +3563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); // SRA X, C -> adds C sign bits. if (ConstantSDNode *C = - isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { APInt ShiftVal = C->getAPIntValue(); ShiftVal += Tmp; Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); @@ -3438,7 +3571,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; case ISD::SHL: if (ConstantSDNode *C = - isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { // shl destroys sign bits. Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); if (C->getAPIntValue().uge(VTBits) || // Bad shift. @@ -3478,10 +3611,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // the minimum of the clamp min/max range. bool IsMax = (Opcode == ISD::SMAX); ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; - if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) - CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), - DemandedElts); + CstHigh = + isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); if (CstLow && CstHigh) { if (!IsMax) std::swap(CstLow, CstHigh); @@ -3621,7 +3754,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - unsigned NumElts = InVec.getValueType().getVectorNumElements(); ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { @@ -3752,13 +3884,43 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { unsigned ExtType = LD->getExtensionType(); switch (ExtType) { - default: break; - case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getScalarSizeInBits(); - return VTBits-Tmp+1; - case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getScalarSizeInBits(); - return VTBits-Tmp; + default: break; + case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. + Tmp = LD->getMemoryVT().getScalarSizeInBits(); + return VTBits - Tmp + 1; + case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. + Tmp = LD->getMemoryVT().getScalarSizeInBits(); + return VTBits - Tmp; + case ISD::NON_EXTLOAD: + if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { + // We only need to handle vectors - computeKnownBits should handle + // scalar cases. + Type *CstTy = Cst->getType(); + if (CstTy->isVectorTy() && + (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) { + Tmp = VTBits; + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + if (Constant *Elt = Cst->getAggregateElement(i)) { + if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { + const APInt &Value = CInt->getValue(); + Tmp = std::min(Tmp, Value.getNumSignBits()); + continue; + } + if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { + APInt Value = CFP->getValueAPF().bitcastToAPInt(); + Tmp = std::min(Tmp, Value.getNumSignBits()); + continue; + } + } + // Unknown type. Conservatively assume no bits match sign bit. + return 1; + } + return Tmp; + } + } + break; } } } @@ -3803,8 +3965,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { return false; if (Op.getOpcode() == ISD::OR && - !MaskedValueIsZero(Op.getOperand(0), - cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) + !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1))) return false; return true; @@ -4013,7 +4174,9 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, return SDValue(); } -static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, +/// Try to simplify vector concatenation to an input value, undef, or build +/// vector. +static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, SelectionDAG &DAG) { assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); @@ -4033,6 +4196,31 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) return DAG.getUNDEF(VT); + // Scan the operands and look for extract operations from a single source + // that correspond to insertion at the same location via this concatenation: + // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... + SDValue IdentitySrc; + bool IsIdentity = true; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + SDValue Op = Ops[i]; + unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op.getOperand(0).getValueType() != VT || + (IdentitySrc && Op.getOperand(0) != IdentitySrc) || + !isa<ConstantSDNode>(Op.getOperand(1)) || + Op.getConstantOperandVal(1) != IdentityIndex) { + IsIdentity = false; + break; + } + assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && + "Unexpected identity source vector for concat of extracts"); + IdentitySrc = Op.getOperand(0); + } + if (IsIdentity) { + assert(IdentitySrc && "Failed to set source vector of extracts"); + return IdentitySrc; + } + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be // simplified to one big BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. @@ -4288,9 +4476,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (Operand.isUndef()) return getUNDEF(VT); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (Operand.isUndef()) + return getUNDEF(VT); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // [us]itofp(undef) = 0, because the result value is bounded. + if (Operand.isUndef()) + return getConstantFP(0.0, DL, VT); + break; case ISD::SIGN_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "SIGN_EXTEND result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4307,6 +4509,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ZERO_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "ZERO_EXTEND result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4323,6 +4528,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ANY_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "ANY_EXTEND result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4350,6 +4558,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); + assert(VT.isVector() == Operand.getValueType().isVector() && + "TRUNCATE result type type should be vector iff the operand " + "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop truncate assert((!VT.isVector() || VT.getVectorNumElements() == @@ -4429,6 +4640,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return Operand.getOperand(0); break; case ISD::FNEG: + // Negation of an unknown bag of bits is still completely undefined. + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) @@ -4513,13 +4728,13 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, const ConstantSDNode *Cst1, - const ConstantSDNode *Cst2) { - if (Cst1->isOpaque() || Cst2->isOpaque()) + EVT VT, const ConstantSDNode *C1, + const ConstantSDNode *C2) { + if (C1->isOpaque() || C2->isOpaque()) return SDValue(); - std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), - Cst2->getAPIntValue()); + std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(), + C2->getAPIntValue()); if (!Folded.second) return SDValue(); return getConstant(Folded.first, DL, VT); @@ -4532,16 +4747,16 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, return SDValue(); if (!TLI->isOffsetFoldingLegal(GA)) return SDValue(); - const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2); - if (!Cst2) + auto *C2 = dyn_cast<ConstantSDNode>(N2); + if (!C2) return SDValue(); - int64_t Offset = Cst2->getSExtValue(); + int64_t Offset = C2->getSExtValue(); switch (Opcode) { case ISD::ADD: break; case ISD::SUB: Offset = -uint64_t(Offset); break; default: return SDValue(); } - return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT, + return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT, GA->getOffset() + uint64_t(Offset)); } @@ -4571,21 +4786,20 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, SDNode *Cst1, - SDNode *Cst2) { + EVT VT, SDNode *N1, SDNode *N2) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); - if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) + if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)})) return getUNDEF(VT); // Handle the case of two scalars. - if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { - if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { - SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2); + if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { + if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { + SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2); assert((!Folded || !VT.isVector()) && "Can't fold vectors ops with scalar operands"); return Folded; @@ -4593,19 +4807,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1)) - return FoldSymbolOffset(Opcode, VT, GA, Cst2); + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) + return FoldSymbolOffset(Opcode, VT, GA, N2); if (TLI->isCommutativeBinOp(Opcode)) - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2)) - return FoldSymbolOffset(Opcode, VT, GA, Cst1); + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) + return FoldSymbolOffset(Opcode, VT, GA, N1); // For vectors, extract each constant element and fold them individually. // Either input may be an undef value. - auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); - if (!BV1 && !Cst1->isUndef()) + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + if (!BV1 && !N1->isUndef()) return SDValue(); - auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); - if (!BV2 && !Cst2->isUndef()) + auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); + if (!BV2 && !N2->isUndef()) return SDValue(); // If both operands are undef, that's handled the same way as scalars. if (!BV1 && !BV2) @@ -4755,6 +4969,64 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, return V; } +SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, + EVT VT, SDValue N1, SDValue N2) { + // TODO: We don't do any constant folding for strict FP opcodes here, but we + // should. That will require dealing with a potentially non-default + // rounding mode, checking the "opStatus" return value from the APFloat + // math calculations, and possibly other variations. + auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); + auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + if (N1CFP && N2CFP) { + APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF(); + switch (Opcode) { + case ISD::FADD: + C1.add(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FSUB: + C1.subtract(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FMUL: + C1.multiply(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FDIV: + C1.divide(C2, APFloat::rmNearestTiesToEven); + return getConstantFP(C1, DL, VT); + case ISD::FREM: + C1.mod(C2); + return getConstantFP(C1, DL, VT); + case ISD::FCOPYSIGN: + C1.copySign(C2); + return getConstantFP(C1, DL, VT); + default: break; + } + } + if (N1CFP && Opcode == ISD::FP_ROUND) { + APFloat C1 = N1CFP->getValueAPF(); // make copy + bool Unused; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &Unused); + return getConstantFP(C1, DL, VT); + } + + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + // If both operands are undef, the result is undef. If 1 operand is undef, + // the result is NaN. This should match the behavior of the IR optimizer. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + if (N1.isUndef() || N2.isUndef()) + return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); + } + return SDValue(); +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -4791,9 +5063,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::CONCAT_VECTORS: { - // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2}; - if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; } @@ -4847,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); + if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) + return V; break; case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. assert(N1.getValueType() == VT && @@ -5100,73 +5373,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) return SV; - // Constant fold FP operations. - bool HasFPExceptions = TLI->hasFloatingPointExceptions(); - if (N1CFP) { - if (N2CFP) { - APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); - APFloat::opStatus s; - switch (Opcode) { - case ISD::FADD: - s = V1.add(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || s != APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); - break; - case ISD::FSUB: - s = V1.subtract(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); - break; - case ISD::FMUL: - s = V1.multiply(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); - break; - case ISD::FDIV: - s = V1.divide(V2, APFloat::rmNearestTiesToEven); - if (!HasFPExceptions || (s!=APFloat::opInvalidOp && - s!=APFloat::opDivByZero)) { - return getConstantFP(V1, DL, VT); - } - break; - case ISD::FREM : - s = V1.mod(V2); - if (!HasFPExceptions || (s!=APFloat::opInvalidOp && - s!=APFloat::opDivByZero)) { - return getConstantFP(V1, DL, VT); - } - break; - case ISD::FCOPYSIGN: - V1.copySign(V2); - return getConstantFP(V1, DL, VT); - default: break; - } - } - - if (Opcode == ISD::FP_ROUND) { - APFloat V = N1CFP->getValueAPF(); // make copy - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, DL, VT); - } - } - - switch (Opcode) { - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: - // If both operands are undef, the result is undef. If 1 operand is undef, - // the result is NaN. This should match the behavior of the IR optimizer. - if (N1.isUndef() && N2.isUndef()) - return getUNDEF(VT); - if (N1.isUndef() || N2.isUndef()) - return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); - } + if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) + return V; // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { @@ -5261,10 +5469,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, APFloat V1 = N1CFP->getValueAPF(); const APFloat &V2 = N2CFP->getValueAPF(); const APFloat &V3 = N3CFP->getValueAPF(); - APFloat::opStatus s = - V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); - if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) - return getConstantFP(V1, DL, VT); + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + return getConstantFP(V1, DL, VT); } break; } @@ -5276,9 +5482,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::CONCAT_VECTORS: { - // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2, N3}; - if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; } @@ -5317,6 +5522,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::INSERT_SUBVECTOR: { + // Inserting undef into undef is still undef. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); SDValue Index = N3; if (VT.isSimple() && N1.getValueType().isSimple() && N2.getValueType().isSimple()) { @@ -5337,6 +5545,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Trivial insertion. if (VT.getSimpleVT() == N2.getSimpleValueType()) return N2; + + // If this is an insert of an extracted vector into an undef vector, we + // can just use the input to the extract. + if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) + return N2.getOperand(0); } break; } @@ -5521,116 +5735,12 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { SrcDelta + G->getOffset()); } -/// Determines the optimal series of memory ops to replace the memset / memcpy. -/// Return true if the number of memory ops is below the threshold (Limit). -/// It returns the types of the sequence of memory ops to perform -/// memset / memcpy by reference. -static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, - unsigned Limit, uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, - bool AllowOverlap, - unsigned DstAS, unsigned SrcAS, - SelectionDAG &DAG, - const TargetLowering &TLI) { - assert((SrcAlign == 0 || SrcAlign >= DstAlign) && - "Expecting memcpy / memset source to meet alignment requirement!"); - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsMemset, ZeroMemset, MemcpyStrSrc, - DAG.getMachineFunction()); - - if (VT == MVT::Other) { - // Use the largest integer type whose alignment constraints are satisfied. - // We only need to check DstAlign here as SrcAlign is always greater or - // equal to DstAlign (or zero). - VT = MVT::i64; - while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && - !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - assert(VT.isInteger()); - - // Find the largest legal integer type. - MVT LVT = MVT::i64; - while (!TLI.isTypeLegal(LVT)) - LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); - assert(LVT.isInteger()); - - // If the type we've chosen is larger than the largest legal integer type - // then use that instead. - if (VT.bitsGT(LVT)) - VT = LVT; - } - - unsigned NumMemOps = 0; - while (Size != 0) { - unsigned VTSize = VT.getSizeInBits() / 8; - while (VTSize > Size) { - // For now, only use non-vector load / store's for the left-over pieces. - EVT NewVT = VT; - unsigned NewVTSize; - - bool Found = false; - if (VT.isVector() || VT.isFloatingPoint()) { - NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; - if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && - TLI.isSafeMemOpType(NewVT.getSimpleVT())) - Found = true; - else if (NewVT == MVT::i64 && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && - TLI.isSafeMemOpType(MVT::f64)) { - // i64 is usually not legal on 32-bit targets, but f64 may be. - NewVT = MVT::f64; - Found = true; - } - } - - if (!Found) { - do { - NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); - if (NewVT == MVT::i8) - break; - } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); - } - NewVTSize = NewVT.getSizeInBits() / 8; - - // If the new VT cannot cover all of the remaining bits, then consider - // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; - if (NumMemOps && AllowOverlap && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && - Fast) - VTSize = Size; - else { - VT = NewVT; - VTSize = NewVTSize; - } - } - - if (++NumMemOps > Limit) - return false; - - MemOps.push_back(VT); - Size -= VTSize; - } - - return true; -} - static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) - return MF.getFunction().optForMinSize(); - return MF.getFunction().optForSize(); + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); } static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, @@ -5665,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5691,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - (isZeroConstant ? 0 : SrcAlign), - false, false, CopyFromConstant, true, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -5851,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5871,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Align > SrcAlign) SrcAlign = Align; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - - if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), SrcAlign, - false, false, false, false, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - DAG, TLI)) + // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in + // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the + // correct code. + bool AllowOverlap = false; + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, + /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -5956,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, uint64_t Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5972,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, DstAlignCanChange = true; bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); - if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), - Size, (DstAlignCanChange ? 0 : Align), 0, - true, IsZeroVal, false, true, - DstPtrInfo.getAddrSpace(), ~0u, - DAG, TLI)) + if (!TLI.findOptimalMemOpLowering( + MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, + (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -6097,9 +6212,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Ty = Type::getInt8PtrTy(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); + + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); @@ -6199,9 +6316,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Ty = Type::getInt8PtrTy(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); + + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); @@ -6294,16 +6413,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); // Emit a library call. - Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; Entry.Ty = IntPtrTy; + Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); Args.push_back(Entry); Entry.Node = Src; Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); Args.push_back(Entry); Entry.Node = Size; - Entry.Ty = IntPtrTy; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Args.push_back(Entry); // FIXME: pass in SDLoc @@ -6384,32 +6502,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, return SDValue(N, 0); } -SDValue SelectionDAG::getAtomicCmpSwap( - unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, - SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, - unsigned Alignment, AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, SyncScope::ID SSID) { - assert(Opcode == ISD::ATOMIC_CMP_SWAP || - Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); - assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); - - MachineFunction &MF = getMachineFunction(); - - // FIXME: Volatile isn't really correct; we should keep track of atomic - // orderings in the memoperand. - auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | - MachineMemOperand::MOStore; - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SSID, SuccessOrdering, - FailureOrdering); - - return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); -} - SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, @@ -6424,35 +6516,6 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, - const Value *PtrVal, unsigned Alignment, - AtomicOrdering Ordering, - SyncScope::ID SSID) { - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); - - MachineFunction &MF = getMachineFunction(); - // An atomic store does not load. An atomic load does not store. - // (An atomicrmw obviously both loads and stores.) - // For now, atomics are considered to be volatile always, and they are - // chained as such. - // FIXME: Volatile isn't really correct; we should keep track of atomic - // orderings in the memoperand. - auto Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; - - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, - MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SSID, Ordering); - - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); -} - -SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, - SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || @@ -6465,6 +6528,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_MAX || Opcode == ISD::ATOMIC_LOAD_UMIN || Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_LOAD_FADD || + Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); @@ -6502,7 +6567,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, - MachineMemOperand::Flags Flags, unsigned Size) { + MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -6511,7 +6576,7 @@ SDValue SelectionDAG::getMemIntrinsicNode( MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -6557,6 +6622,36 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, return SDValue(N, 0); } +SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, + SDValue Chain, int FrameIndex, + int64_t Size, int64_t Offset) { + const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; + const auto VTs = getVTList(MVT::Other); + SDValue Ops[2] = { + Chain, + getFrameIndex(FrameIndex, + getTargetLoweringInfo().getFrameIndexTy(getDataLayout()), + true)}; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + ID.AddInteger(FrameIndex); + ID.AddInteger(Size); + ID.AddInteger(Offset); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + LifetimeSDNode *N = newSDNode<LifetimeSDNode>( + Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a @@ -6875,7 +6970,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); @@ -6901,12 +6996,11 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, bool IsTruncating, bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Ops[] = { Chain, Val, Ptr, Mask }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); - ID.AddInteger(VT.getRawBits()); + ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); @@ -7057,6 +7151,31 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { return SDValue(); } +// TODO: Use fast-math-flags to enable more simplifications. +SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { + ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); + if (!YC) + return SDValue(); + + // X + -0.0 --> X + if (Opcode == ISD::FADD) + if (YC->getValueAPF().isNegZero()) + return X; + + // X - +0.0 --> X + if (Opcode == ISD::FSUB) + if (YC->getValueAPF().isPosZero()) + return X; + + // X * 1.0 --> X + // X / 1.0 --> X + if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) + if (YC->getValueAPF().isExactlyValue(1.0)) + return X; + + return SDValue(); +} + SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; @@ -7098,8 +7217,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; break; case ISD::CONCAT_VECTORS: - // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. - if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; case ISD::SELECT_CC: @@ -7629,56 +7747,50 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { unsigned OrigOpc = Node->getOpcode(); unsigned NewOpc; - bool IsUnary = false; - bool IsTernary = false; switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); - case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; - case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; - case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; - case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; - case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; - case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break; - case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; - case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; - case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; - case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break; - case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break; - case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break; - case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break; - case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break; - case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break; - case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break; - case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break; - case ISD::STRICT_FNEARBYINT: - NewOpc = ISD::FNEARBYINT; - IsUnary = true; - break; - case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; - case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; - case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break; - case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break; - case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break; - case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break; - } + case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; + case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FMA: NewOpc = ISD::FMA; break; + case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break; + case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break; + case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break; + case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break; + case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break; + case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; + case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; + case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; + case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; + case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; + case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; + case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; + case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; + case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; + case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + } + + assert(Node->getNumValues() == 2 && "Unexpected number of results!"); // We're taking this node out of the chain, so we need to re-link things. SDValue InputChain = Node->getOperand(0); SDValue OutputChain = SDValue(Node, 1); ReplaceAllUsesOfValueWith(OutputChain, InputChain); - SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); - SDNode *Res = nullptr; - if (IsUnary) - Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); - else if (IsTernary) - Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), - Node->getOperand(2), - Node->getOperand(3)}); - else - Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), - Node->getOperand(2) }); + SmallVector<SDValue, 3> Ops; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(Node->getOperand(i)); + + SDVTList VTs = getVTList(Node->getValueType(0)); + SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops); // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it @@ -7980,9 +8092,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { // DIExpression, we need to mark the expression with a // DW_OP_stack_value. auto *DIExpr = DV->getExpression(); - DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, - DIExpression::NoDeref, - DIExpression::WithStackValue); + DIExpr = + DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset); SDDbgValue *Clone = getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); @@ -8288,19 +8399,17 @@ void SelectionDAG::updateDivergence(SDNode * N) } } - -void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { DenseMap<SDNode *, unsigned> Degree; Order.reserve(AllNodes.size()); - for (auto & N : allnodes()) { + for (auto &N : allnodes()) { unsigned NOps = N.getNumOperands(); Degree[&N] = NOps; if (0 == NOps) Order.push_back(&N); } - for (std::vector<SDNode *>::iterator I = Order.begin(); - I!=Order.end();++I) { - SDNode * N = *I; + for (size_t I = 0; I != Order.size(); ++I) { + SDNode *N = Order[I]; for (auto U : N->uses()) { unsigned &UnsortedOps = Degree[U]; if (0 == --UnsortedOps) @@ -8310,9 +8419,8 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { } #ifndef NDEBUG -void SelectionDAG::VerifyDAGDiverence() -{ - std::vector<SDNode*> TopoOrder; +void SelectionDAG::VerifyDAGDiverence() { + std::vector<SDNode *> TopoOrder; CreateTopologicalOrder(TopoOrder); const TargetLowering &TLI = getTargetLoweringInfo(); DenseMap<const SDNode *, bool> DivergenceMap; @@ -8338,7 +8446,6 @@ void SelectionDAG::VerifyDAGDiverence() } #endif - /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value /// may appear in both the From and To list. The Deleted vector is @@ -8584,14 +8691,24 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { return V; } -bool llvm::isBitwiseNot(SDValue V) { +SDValue llvm::peekThroughExtractSubvectors(SDValue V) { + while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) + V = V.getOperand(0); + return V; +} + +bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { if (V.getOpcode() != ISD::XOR) return false; - ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1))); - return C && C->isAllOnesValue(); + V = peekThroughBitcasts(V.getOperand(1)); + unsigned NumBits = V.getScalarValueSizeInBits(); + ConstantSDNode *C = + isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true); + return C && (C->getAPIntValue().countTrailingOnes() >= NumBits); } -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, + bool AllowTruncation) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) return CN; @@ -8599,10 +8716,39 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { BitVector UndefElements; ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); - // BuildVectors can truncate their operands. Ignore that case here. - if (CN && (UndefElements.none() || AllowUndefs) && - CN->getValueType(0) == N.getValueType().getScalarType()) - return CN; + // BuildVectors can truncate their operands. Ignore that case here unless + // AllowTruncation is set. + if (CN && (UndefElements.none() || AllowUndefs)) { + EVT CVT = CN->getValueType(0); + EVT NSVT = N.getValueType().getScalarType(); + assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); + if (AllowTruncation || (CVT == NSVT)) + return CN; + } + } + + return nullptr; +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, + bool AllowUndefs, + bool AllowTruncation) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here unless + // AllowTruncation is set. + if (CN && (UndefElements.none() || AllowUndefs)) { + EVT CVT = CN->getValueType(0); + EVT NSVT = N.getValueType().getScalarType(); + assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); + if (AllowTruncation || (CVT == NSVT)) + return CN; + } } return nullptr; @@ -8622,9 +8768,26 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { return nullptr; } -bool llvm::isNullOrNullSplat(SDValue N) { +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, + const APInt &DemandedElts, + bool AllowUndefs) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = + BV->getConstantFPSplatNode(DemandedElts, &UndefElements); + if (CN && (UndefElements.none() || AllowUndefs)) + return CN; + } + + return nullptr; +} + +bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { // TODO: may want to use peekThroughBitcast() here. - ConstantSDNode *C = isConstOrConstSplat(N); + ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); return C && C->isNullValue(); } @@ -8773,17 +8936,12 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { /// isOperand - Return true if this node is an operand of N. bool SDValue::isOperandOf(const SDNode *N) const { - for (const SDValue &Op : N->op_values()) - if (*this == Op) - return true; - return false; + return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; }); } bool SDNode::isOperandOf(const SDNode *N) const { - for (const SDValue &Op : N->op_values()) - if (this == Op.getNode()) - return true; - return false; + return any_of(N->op_values(), + [this](SDValue Op) { return this == Op.getNode(); }); } /// reachesChainWithoutSideEffects - Return true if this operand (which must @@ -8973,6 +9131,56 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { return getBuildVector(VecVT, dl, Scalars); } +std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( + SDNode *N, unsigned ResNE) { + unsigned Opcode = N->getOpcode(); + assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || + Opcode == ISD::USUBO || Opcode == ISD::SSUBO || + Opcode == ISD::UMULO || Opcode == ISD::SMULO) && + "Expected an overflow opcode"); + + EVT ResVT = N->getValueType(0); + EVT OvVT = N->getValueType(1); + EVT ResEltVT = ResVT.getVectorElementType(); + EVT OvEltVT = OvVT.getVectorElementType(); + SDLoc dl(N); + + // If ResNE is 0, fully unroll the vector op. + unsigned NE = ResVT.getVectorNumElements(); + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + SmallVector<SDValue, 8> LHSScalars; + SmallVector<SDValue, 8> RHSScalars; + ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE); + ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE); + + EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT); + SDVTList VTs = getVTList(ResEltVT, SVT); + SmallVector<SDValue, 8> ResScalars; + SmallVector<SDValue, 8> OvScalars; + for (unsigned i = 0; i < NE; ++i) { + SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]); + SDValue Ov = + getSelect(dl, OvEltVT, Res.getValue(1), + getBoolConstant(true, dl, OvEltVT, ResVT), + getConstant(0, dl, OvEltVT)); + + ResScalars.push_back(Res); + OvScalars.push_back(Ov); + } + + ResScalars.append(ResNE - NE, getUNDEF(ResEltVT)); + OvScalars.append(ResNE - NE, getUNDEF(OvEltVT)); + + EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE); + EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE); + return std::make_pair(getBuildVector(NewResVT, dl, ResScalars), + getBuildVector(NewOvVT, dl, OvScalars)); +} + bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, @@ -9014,7 +9222,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a direct reference to a stack slot, use information about the // stack slot's alignment. - int FrameIdx = 1 << 31; + int FrameIdx = INT_MIN; int64_t FrameOffset = 0; if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { FrameIdx = FI->getIndex(); @@ -9025,7 +9233,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { FrameOffset = Ptr.getConstantOperandVal(1); } - if (FrameIdx != (1 << 31)) { + if (FrameIdx != INT_MIN) { const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); @@ -9065,6 +9273,15 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, return std::make_pair(Lo, Hi); } +/// Widen the vector up to the next power of two using INSERT_SUBVECTOR. +SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { + EVT VT = N.getValueType(); + EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NextPowerOf2(VT.getVectorNumElements())); + return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, + getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); +} + void SelectionDAG::ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args, unsigned Start, unsigned Count) { @@ -9158,13 +9375,20 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, return true; } -SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { +SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, + BitVector *UndefElements) const { if (UndefElements) { UndefElements->clear(); UndefElements->resize(getNumOperands()); } + assert(getNumOperands() == DemandedElts.getBitWidth() && + "Unexpected vector size"); + if (!DemandedElts) + return SDValue(); SDValue Splatted; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; SDValue Op = getOperand(i); if (Op.isUndef()) { if (UndefElements) @@ -9177,20 +9401,40 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { } if (!Splatted) { - assert(getOperand(0).isUndef() && + unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros(); + assert(getOperand(FirstDemandedIdx).isUndef() && "Can only have a splat without a constant for all undefs."); - return getOperand(0); + return getOperand(FirstDemandedIdx); } return Splatted; } +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + return getSplatValue(DemandedElts, UndefElements); +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, + BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>( + getSplatValue(DemandedElts, UndefElements)); +} + ConstantSDNode * BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); } ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, + BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>( + getSplatValue(DemandedElts, UndefElements)); +} + +ConstantFPSDNode * BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); } @@ -9228,7 +9472,10 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) /* search */; - assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + // If all elements are undefined, this shuffle can be considered a splat + // (although it should eventually get simplified away completely). + if (i == e) + return true; // Make sure all remaining elements are either undef or the same as the first // non-undef value. @@ -9266,8 +9513,7 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { assert(!Node->OperandList && "Node already has operands"); - assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >= - Vals.size() && + assert(SDNode::getMaxNumOperands() >= Vals.size() && "too many operands to fit into SDNode"); SDUse *Ops = OperandRecycler.allocate( ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); @@ -9287,6 +9533,19 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { checkForCycles(Node); } +SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, + SmallVectorImpl<SDValue> &Vals) { + size_t Limit = SDNode::getMaxNumOperands(); + while (Vals.size() > Limit) { + unsigned SliceIdx = Vals.size() - Limit; + auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit); + SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); + Vals.erase(Vals.begin() + SliceIdx, Vals.end()); + Vals.emplace_back(NewTF); + } + return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, |