diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1298 |
1 files changed, 749 insertions, 549 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 313e07b5fdd6..592c09c10fb0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -543,7 +544,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::ConstantPool: case ISD::TargetConstantPool: { const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); - ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getAlign().value()); ID.AddInteger(CP->getOffset()); if (CP->isMachineConstantPoolEntry()) CP->getMachineCPVal()->addSelectionDAGCSEId(ID); @@ -1000,12 +1001,12 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, return Node; } -unsigned SelectionDAG::getEVTAlignment(EVT VT) const { +Align SelectionDAG::getEVTAlign(EVT VT) const { Type *Ty = VT == MVT::iPTR ? PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return getDataLayout().getABITypeAlignment(Ty); + return getDataLayout().getABITypeAlign(Ty); } // EntryNode could meaningfully have debug info if we can find it... @@ -1167,15 +1168,21 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, } SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { - assert(!VT.isVector() && - "getZeroExtendInReg should use the vector element type instead of " - "the vector type!"); - if (Op.getValueType().getScalarType() == VT) return Op; - unsigned BitWidth = Op.getScalarValueSizeInBits(); - APInt Imm = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); - return getNode(ISD::AND, DL, Op.getValueType(), Op, - getConstant(Imm, DL, Op.getValueType())); + EVT OpVT = Op.getValueType(); + assert(VT.isInteger() && OpVT.isInteger() && + "Cannot getZeroExtendInReg FP types"); + assert(VT.isVector() == OpVT.isVector() && + "getZeroExtendInReg type should be vector iff the operand " + "type is vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == OpVT.getVectorElementCount()) && + "Vector element counts must match in getZeroExtendInReg"); + assert(VT.bitsLE(OpVT) && "Not extending!"); + if (OpVT == VT) + return Op; + APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), + VT.getScalarSizeInBits()); + return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT)); } SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { @@ -1332,10 +1339,16 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes) { + assert(VT.isInteger() && "Shift amount is not an integer type!"); EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); return getConstant(Val, DL, ShiftVT); } +SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL, + bool isTarget) { + return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget); +} + SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, bool isTarget) { return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); @@ -1381,7 +1394,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, else if (EltVT == MVT::f64) return getConstantFP(APFloat(Val), DL, VT, isTarget); else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || - EltVT == MVT::f16) { + EltVT == MVT::f16 || EltVT == MVT::bf16) { bool Ignored; APFloat APF = APFloat(Val); APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, @@ -1459,19 +1472,18 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, } SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, - unsigned Alignment, int Offset, - bool isTarget, - unsigned TargetFlags) { + MaybeAlign Alignment, int Offset, + bool isTarget, unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); - if (Alignment == 0) + if (!Alignment) Alignment = shouldOptForSize() - ? getDataLayout().getABITypeAlignment(C->getType()) - : getDataLayout().getPrefTypeAlignment(C->getType()); + ? getDataLayout().getABITypeAlign(C->getType()) + : getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); - ID.AddInteger(Alignment); + ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); ID.AddPointer(C); ID.AddInteger(TargetFlags); @@ -1479,25 +1491,26 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new constant pool: ", this); + return V; } SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, - unsigned Alignment, int Offset, - bool isTarget, - unsigned TargetFlags) { + MaybeAlign Alignment, int Offset, + bool isTarget, unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); - if (Alignment == 0) - Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); + if (!Alignment) + Alignment = getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); - ID.AddInteger(Alignment); + ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); @@ -1505,7 +1518,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -1861,9 +1874,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, } SDValue SelectionDAG::getSrcValue(const Value *V) { - assert((!V || V->getType()->isPointerTy()) && - "SrcValue is not a pointer?"); - FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); @@ -1921,6 +1931,10 @@ SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, return SDValue(N, 0); } +SDValue SelectionDAG::getFreeze(SDValue V) { + return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V); +} + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { @@ -1979,28 +1993,54 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) { MachinePointerInfo(VD)); } -SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { - MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - unsigned ByteSize = VT.getStoreSize(); +Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) { + const DataLayout &DL = getDataLayout(); Type *Ty = VT.getTypeForEVT(*getContext()); - unsigned StackAlign = - std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); + Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); + + if (TLI->isTypeLegal(VT) || !VT.isVector()) + return RedAlign; + + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + const Align StackAlign = TFI->getStackAlign(); + + // See if we can choose a smaller ABI alignment in cases where it's an + // illegal vector type that will get broken down. + if (RedAlign > StackAlign) { + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT, + NumIntermediates, RegisterVT); + Ty = IntermediateVT.getTypeForEVT(*getContext()); + Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); + if (RedAlign2 < RedAlign) + RedAlign = RedAlign2; + } + + return RedAlign; +} - int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); +SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { + Type *Ty = VT.getTypeForEVT(*getContext()); + Align StackAlign = + std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign)); + return CreateStackTemporary(VT.getStoreSize(), StackAlign); +} + SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); + TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); - unsigned Align = - std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); - - MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); + Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2)); + return CreateStackTemporary(Bytes, Align); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, @@ -2179,21 +2219,16 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, const APInt &DemandedElts) { switch (V.getOpcode()) { default: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); break; case ISD::Constant: { - auto *CV = cast<ConstantSDNode>(V.getNode()); - assert(CV && "Const value should be ConstSDNode."); - const APInt &CVal = CV->getAPIntValue(); + const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); APInt NewVal = CVal & DemandedBits; if (NewVal != CVal) return getConstant(NewVal, SDLoc(V), V.getValueType()); break; } - case ISD::OR: - case ISD::XOR: - case ISD::SIGN_EXTEND_INREG: - return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, - *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2224,19 +2259,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } break; } - case ISD::ANY_EXTEND: { - SDValue Src = V.getOperand(0); - unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); - // Being conservative here - only peek through if we only demand bits in the - // non-extended source (even though the extended bits are technically - // undef). - if (DemandedBits.getActiveBits() > SrcBitWidth) - break; - APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); - if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits)) - return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); - break; - } } return SDValue(); } @@ -2253,11 +2275,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// for bits that V cannot have. bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, unsigned Depth) const { - EVT VT = V.getValueType(); - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) - : APInt(1, 1); - return MaskedValueIsZero(V, Mask, DemandedElts, Depth); + return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero); } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in @@ -2276,15 +2294,42 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, } /// isSplatValue - Return true if the vector V has the same value -/// across all DemandedElts. +/// across all DemandedElts. For scalable vectors it does not make +/// sense to specify which elements are demanded or undefined, therefore +/// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts) { - if (!DemandedElts) - return false; // No demanded elts, better to assume we don't know anything. - EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); + if (!VT.isScalableVector() && !DemandedElts) + return false; // No demanded elts, better to assume we don't know anything. + + // Deal with some common cases here that work for both fixed and scalable + // vector types. + switch (V.getOpcode()) { + case ISD::SPLAT_VECTOR: + return true; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: { + APInt UndefLHS, UndefRHS; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + if (isSplatValue(LHS, DemandedElts, UndefLHS) && + isSplatValue(RHS, DemandedElts, UndefRHS)) { + UndefElts = UndefLHS | UndefRHS; + return true; + } + break; + } + } + + // We don't support other cases than those above for scalable vectors at + // the moment. + if (VT.isScalableVector()) + return false; + unsigned NumElts = VT.getVectorNumElements(); assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); UndefElts = APInt::getNullValue(NumElts); @@ -2326,30 +2371,14 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return true; } case ISD::EXTRACT_SUBVECTOR: { + // Offset the demanded elts by the subvector index. SDValue Src = V.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1)); + uint64_t Idx = V.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - APInt UndefSrcElts; - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) { - UndefElts = UndefSrcElts.extractBits(NumElts, Idx); - return true; - } - } - break; - } - case ISD::ADD: - case ISD::SUB: - case ISD::AND: { - APInt UndefLHS, UndefRHS; - SDValue LHS = V.getOperand(0); - SDValue RHS = V.getOperand(1); - if (isSplatValue(LHS, DemandedElts, UndefLHS) && - isSplatValue(RHS, DemandedElts, UndefRHS)) { - UndefElts = UndefLHS | UndefRHS; + APInt UndefSrcElts; + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) { + UndefElts = UndefSrcElts.extractBits(NumElts, Idx); return true; } break; @@ -2363,10 +2392,13 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); - unsigned NumElts = VT.getVectorNumElements(); APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(NumElts); + APInt DemandedElts; + + // For now we don't support this with scalable vectors. + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2379,19 +2411,35 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { switch (Opcode) { default: { APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + APInt DemandedElts; + + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (isSplatValue(V, DemandedElts, UndefElts)) { - // Handle case where all demanded elements are UNDEF. - if (DemandedElts.isSubsetOf(UndefElts)) { + if (VT.isScalableVector()) { + // DemandedElts and UndefElts are ignored for scalable vectors, since + // the only supported cases are SPLAT_VECTOR nodes. SplatIdx = 0; - return getUNDEF(VT); + } else { + // Handle case where all demanded elements are UNDEF. + if (DemandedElts.isSubsetOf(UndefElts)) { + SplatIdx = 0; + return getUNDEF(VT); + } + SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); } - SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); return V; } break; } + case ISD::SPLAT_VECTOR: + SplatIdx = 0; + return V; case ISD::VECTOR_SHUFFLE: { + if (VT.isScalableVector()) + return SDValue(); + // Check if this is a shuffle node doing a splat. // TODO - remove this and rely purely on SelectionDAG::isSplatValue, // getTargetVShiftNode currently struggles without the splat source. @@ -2413,14 +2461,16 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), SrcVector.getValueType().getScalarType(), SrcVector, - getIntPtrConstant(SplatIdx, SDLoc(V))); + getVectorIdxConstant(SplatIdx, SDLoc(V))); return SDValue(); } -/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that -/// is less than the element bit-width of the shift node, return it. -static const APInt *getValidShiftAmountConstant(SDValue V, - const APInt &DemandedElts) { +const APInt * +SelectionDAG::getValidShiftAmountConstant(SDValue V, + const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) { // Shifting more than the bitwidth is not valid. @@ -2431,10 +2481,13 @@ static const APInt *getValidShiftAmountConstant(SDValue V, return nullptr; } -/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less -/// than the element bit-width of the shift node, return the minimum value. -static const APInt * -getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { +const APInt *SelectionDAG::getValidMinimumShiftAmountConstant( + SDValue V, const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); + if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) + return ValidAmt; unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) @@ -2457,10 +2510,13 @@ getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { return MinShAmt; } -/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less -/// than the element bit-width of the shift node, return the maximum value. -static const APInt * -getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { +const APInt *SelectionDAG::getValidMaximumShiftAmountConstant( + SDValue V, const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); + if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) + return ValidAmt; unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) @@ -2488,6 +2544,14 @@ getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { /// every vector element. KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); + + // TOOD: Until we have a plan for how to represent demanded elements for + // scalable vectors, we can just bail out for now. + if (Op.getValueType().isScalableVector()) { + unsigned BitWidth = Op.getScalarValueSizeInBits(); + return KnownBits(BitWidth); + } + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -2503,6 +2567,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known(BitWidth); // Don't know anything. + // TOOD: Until we have a plan for how to represent demanded elements for + // scalable vectors, we can just bail out for now. + if (Op.getValueType().isScalableVector()) + return Known; + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { // We know all of the bits for a constant! Known.One = C->getAPIntValue(); @@ -2622,52 +2691,40 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::INSERT_SUBVECTOR: { - // If we know the element index, demand any elements from the subvector and - // the remainder from the src its inserted into, otherwise demand them all. + // Demand any elements from the subvector and the remainder from the src its + // inserted into. SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { - Known.One.setAllBits(); - Known.Zero.setAllBits(); - uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); - if (!!DemandedSubElts) { - Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); - if (Known.isUnknown()) - break; // early-out. - } - APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); - APInt DemandedSrcElts = DemandedElts & ~SubMask; - if (!!DemandedSrcElts) { - Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } - } else { - Known = computeKnownBits(Sub, Depth + 1); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + + Known.One.setAllBits(); + Known.Zero.setAllBits(); + if (!!DemandedSubElts) { + Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); if (Known.isUnknown()) break; // early-out. - Known2 = computeKnownBits(Src, Depth + 1); + } + if (!!DemandedSrcElts) { + Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } break; } case ISD::EXTRACT_SUBVECTOR: { - // If we know the element index, just demand that subvector elements, - // otherwise demand them all. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2753,35 +2810,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::AND: - // If either the LHS or the RHS are Zero, the result is zero. Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-1 bits are only known if set in both the LHS & RHS. - Known.One &= Known2.One; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - Known.Zero |= Known2.Zero; + Known &= Known2; break; case ISD::OR: Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-0 bits are only known if clear in both the LHS & RHS. - Known.Zero &= Known2.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - Known.One |= Known2.One; + Known |= Known2; break; - case ISD::XOR: { + case ISD::XOR: Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - Known.Zero = KnownZeroOut; + Known ^= Known2; break; - } case ISD::MUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); @@ -3075,12 +3120,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::ZERO_EXTEND: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { @@ -3099,9 +3144,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = Known.sext(BitWidth); break; } + case ISD::ANY_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); + Known = Known.anyext(BitWidth); + break; + } case ISD::ANY_EXTEND: { - Known = computeKnownBits(Op.getOperand(0), Depth+1); - Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = Known.anyext(BitWidth); break; } case ISD::TRUNCATE: { @@ -3117,6 +3169,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One &= (~Known.Zero); break; } + case ISD::AssertAlign: { + unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign()); + assert(LogOfAlign != 0); + // If a node is guaranteed to be aligned, set low zero bits accordingly as + // well as clearing one bits. + Known.Zero.setLowBits(LogOfAlign); + Known.One.clearLowBits(LogOfAlign); + break; + } case ISD::FGETSIGN: // All bits are zero except the low bit. Known.Zero.setBitsFrom(1); @@ -3134,6 +3195,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, LLVM_FALLTHROUGH; case ISD::SUB: case ISD::SUBC: { + assert(Op.getResNo() == 0 && + "We only compute knownbits for the difference here."); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, @@ -3245,57 +3309,51 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT VecVT = InVec.getValueType(); const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); const unsigned NumSrcElts = VecVT.getVectorNumElements(); + // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. if (BitWidth > EltBitWidth) Known = Known.trunc(EltBitWidth); - ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); - if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { - // If we know the element index, just demand that vector element. - unsigned Idx = ConstEltNo->getZExtValue(); - APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - Known = computeKnownBits(InVec, DemandedElt, Depth + 1); - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Known = computeKnownBits(InVec, Depth + 1); - } + + // If we know the element index, just demand that vector element, else for + // an unknown element index, ignore DemandedElts and demand them all. + APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) + DemandedSrcElts = + APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); + + Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1); if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth, false /* => any extend */); + Known = Known.anyext(BitWidth); break; } case ISD::INSERT_VECTOR_ELT: { + // If we know the element index, split the demand between the + // source vector and the inserted element, otherwise assume we need + // the original demanded vector elements and the value. SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - - ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + bool DemandedVal = true; + APInt DemandedVecElts = DemandedElts; + auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { - // If we know the element index, split the demand between the - // source vector and the inserted element. - Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); unsigned EltIdx = CEltNo->getZExtValue(); - - // If we demand the inserted element then add its common known bits. - if (DemandedElts[EltIdx]) { - Known2 = computeKnownBits(InVal, Depth + 1); - Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); - } - - // If we demand the source vector then add its common known bits, ensuring - // that we don't demand the inserted element. - APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); - if (!!VectorElts) { - Known2 = computeKnownBits(InVec, VectorElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Known = computeKnownBits(InVec, Depth + 1); + DemandedVal = !!DemandedElts[EltIdx]; + DemandedVecElts.clearBit(EltIdx); + } + Known.One.setAllBits(); + Known.Zero.setAllBits(); + if (DemandedVal) { Known2 = computeKnownBits(InVal, Depth + 1); - Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); + Known.One &= Known2.One.zextOrTrunc(BitWidth); + Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth); + } + if (!!DemandedVecElts) { + Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } break; } @@ -3399,7 +3457,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::FrameIndex: case ISD::TargetFrameIndex: - TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); + TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(), + Known, getMachineFunction()); break; default: @@ -3492,6 +3551,11 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); + + // TODO: Assume we don't know anything for now. + if (VT.isScalableVector()) + return 1; + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -3515,7 +3579,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. - if (!DemandedElts) + if (!DemandedElts || VT.isScalableVector()) return 1; // No demanded elts, better to assume we don't know anything. unsigned Opcode = Op.getOpcode(); @@ -3535,7 +3599,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, continue; SDValue SrcOp = Op.getOperand(i); - Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1); + Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != VTBits) { @@ -3646,23 +3710,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); // SRA X, C -> adds C sign bits. - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) - Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); - else if (const APInt *ShAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) + if (const APInt *ShAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); return Tmp; case ISD::SHL: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { + if (const APInt *ShAmt = + getValidMaximumShiftAmountConstant(Op, DemandedElts)) { // shl destroys sign bits, ensure it doesn't shift out all sign bits. Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (ShAmt->ult(Tmp)) return Tmp - ShAmt->getZExtValue(); - } else if (const APInt *ShAmt = - getValidMaximumShiftAmountConstant(Op, DemandedElts)) { - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (ShAmt->ult(Tmp)) - return Tmp - ShAmt->getZExtValue(); } break; case ISD::AND: @@ -3712,18 +3770,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } // Fallback - just get the minimum number of sign bits of the operands. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); } case ISD::UMIN: case ISD::UMAX: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); case ISD::SADDO: case ISD::UADDO: @@ -3753,7 +3811,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } case ISD::ROTL: case ISD::ROTR: - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // If we're rotating an 0/-1 value, then it stays an 0/-1 value. + if (Tmp == VTBits) + return VTBits; + + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. @@ -3762,7 +3827,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we aren't rotating out all of the known-in sign bits, return the // number that are left. This handles rotl(sext(x), 1) for example. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); } break; @@ -3770,13 +3834,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::ADDC: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): - if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (ConstantSDNode *CRHS = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) if (CRHS->isAllOnesValue()) { - KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1); + KnownBits Known = + computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -3789,18 +3855,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Tmp2 == 1) return 1; // Early out. + return std::min(Tmp, Tmp2) - 1; case ISD::SUB: - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Tmp2 == 1) return 1; // Early out. // Handle NEG. - if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) + if (ConstantSDNode *CLHS = + isConstOrConstSplat(Op.getOperand(0), DemandedElts)) if (CLHS->isNullValue()) { - KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1); + KnownBits Known = + computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((Known.Zero | 1).isAllOnesValue()) @@ -3816,9 +3883,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2) - 1; case ISD::MUL: { // The output of the Mul can be at most twice the valid bits in the inputs. unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3853,39 +3920,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } case ISD::INSERT_VECTOR_ELT: { + // If we know the element index, split the demand between the + // source vector and the inserted element, otherwise assume we need + // the original demanded vector elements and the value. SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - - ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + bool DemandedVal = true; + APInt DemandedVecElts = DemandedElts; + auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { - // If we know the element index, split the demand between the - // source vector and the inserted element. unsigned EltIdx = CEltNo->getZExtValue(); - - // If we demand the inserted element then get its sign bits. - Tmp = std::numeric_limits<unsigned>::max(); - if (DemandedElts[EltIdx]) { - // TODO - handle implicit truncation of inserted elements. - if (InVal.getScalarValueSizeInBits() != VTBits) - break; - Tmp = ComputeNumSignBits(InVal, Depth + 1); - } - - // If we demand the source vector then get its sign bits, and determine - // the minimum. - APInt VectorElts = DemandedElts; - VectorElts.clearBit(EltIdx); - if (!!VectorElts) { - Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Tmp = ComputeNumSignBits(InVec, Depth + 1); + DemandedVal = !!DemandedElts[EltIdx]; + DemandedVecElts.clearBit(EltIdx); + } + Tmp = std::numeric_limits<unsigned>::max(); + if (DemandedVal) { + // TODO - handle implicit truncation of inserted elements. + if (InVal.getScalarValueSizeInBits() != VTBits) + break; Tmp2 = ComputeNumSignBits(InVal, Depth + 1); Tmp = std::min(Tmp, Tmp2); } + if (!!DemandedVecElts) { + Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -3906,7 +3966,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we know the element index, just demand that vector element, else for // an unknown element index, ignore DemandedElts and demand them all. APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); - ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); @@ -3914,18 +3974,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } case ISD::EXTRACT_SUBVECTOR: { - // If we know the element index, just demand that subvector elements, - // otherwise demand them all. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3946,35 +4003,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } case ISD::INSERT_SUBVECTOR: { - // If we know the element index, demand any elements from the subvector and - // the remainder from the src its inserted into, otherwise demand them all. + // Demand any elements from the subvector and the remainder from the src its + // inserted into. SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { - Tmp = std::numeric_limits<unsigned>::max(); - uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); - if (!!DemandedSubElts) { - Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); - if (Tmp == 1) return 1; // early-out - } - APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); - APInt DemandedSrcElts = DemandedElts & ~SubMask; - if (!!DemandedSrcElts) { - Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); - return Tmp; - } + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); - // Not able to determine the index so just assume worst case. - Tmp = ComputeNumSignBits(Sub, Depth + 1); - if (Tmp == 1) return 1; // early-out - Tmp2 = ComputeNumSignBits(Src, Depth + 1); - Tmp = std::min(Tmp, Tmp2); + Tmp = std::numeric_limits<unsigned>::max(); + if (!!DemandedSubElts) { + Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); + if (Tmp == 1) + return 1; // early-out + } + if (!!DemandedSrcElts) { + Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -4052,13 +4100,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return FirstAnswer; } - // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // Okay, we know that the sign bit in Mask is set. Use CLO to determine // the number of identical bits in the top of the input value. - Mask = ~Mask; Mask <<= Mask.getBitWidth()-VTBits; - // Return # leading zeros. We use 'min' here in case Val was zero before - // shifting. We don't want to return '64' as for an i32 "0". - return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); + return std::max(FirstAnswer, Mask.countLeadingOnes()); } bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { @@ -4109,6 +4154,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FFLOOR: case ISD::FCEIL: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FRINT: case ISD::FNEARBYINT: { if (SNaN) @@ -4249,6 +4295,8 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, SelectionDAG &DAG) { int NumOps = Ops.size(); assert(NumOps != 0 && "Can't build an empty vector!"); + assert(!VT.isScalableVector() && + "BUILD_VECTOR cannot be used with scalable types"); assert(VT.getVectorNumElements() == (unsigned)NumOps && "Incorrect element count in BUILD_VECTOR!"); @@ -4287,8 +4335,8 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, return Ops[0].getValueType() == Op.getValueType(); }) && "Concatenation of vectors with inconsistent value types!"); - assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) == - VT.getVectorNumElements() && + assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) == + VT.getVectorElementCount() && "Incorrect element count in vector concatenation!"); if (Ops.size() == 1) @@ -4305,11 +4353,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, bool IsIdentity = true; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { SDValue Op = Ops[i]; - unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); + unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements(); if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || Op.getOperand(0).getValueType() != VT || (IdentitySrc && Op.getOperand(0) != IdentitySrc) || - !isa<ConstantSDNode>(Op.getOperand(1)) || Op.getConstantOperandVal(1) != IdentityIndex) { IsIdentity = false; break; @@ -4323,6 +4370,11 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, return IdentitySrc; } + // The code below this point is only designed to work for fixed width + // vectors, so we bail out for now. + if (VT.isScalableVector()) + return SDValue(); + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be // simplified to one big BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. @@ -4508,7 +4560,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // FIXME need to be more flexible about rounding mode. (void)V.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); - return getConstant(V.bitcastToAPInt(), DL, VT); + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); } } } @@ -4553,6 +4605,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { + case ISD::FREEZE: + assert(VT == Operand.getValueType() && "Unexpected VT!"); + break; case ISD::TokenFactor: case ISD::MERGE_VALUES: case ISD::CONCAT_VECTORS: @@ -4597,8 +4652,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); @@ -4616,8 +4671,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); @@ -4635,8 +4690,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!"); @@ -4665,8 +4720,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop truncate assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); @@ -4753,6 +4808,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); break; + case ISD::VSCALE: + assert(VT == Operand.getValueType() && "Unexpected VT!"); + break; } SDNode *N; @@ -4824,17 +4882,6 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, return llvm::None; } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, const ConstantSDNode *C1, - const ConstantSDNode *C2) { - if (C1->isOpaque() || C2->isOpaque()) - return SDValue(); - if (Optional<APInt> Folded = - FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue())) - return getConstant(Folded.getValue(), DL, VT); - return SDValue(); -} - SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, const GlobalAddressSDNode *GA, const SDNode *N2) { @@ -4881,20 +4928,37 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, SDNode *N1, SDNode *N2) { + EVT VT, ArrayRef<SDValue> Ops) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); - if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)})) + // For now, the array Ops should only contain two values. + // This enforcement will be removed once this function is merged with + // FoldConstantVectorArithmetic + if (Ops.size() != 2) + return SDValue(); + + if (isUndef(Opcode, Ops)) return getUNDEF(VT); + SDNode *N1 = Ops[0].getNode(); + SDNode *N2 = Ops[1].getNode(); + // Handle the case of two scalars. if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { - SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2); + if (C1->isOpaque() || C2->isOpaque()) + return SDValue(); + + Optional<APInt> FoldAttempt = + FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); + if (!FoldAttempt) + return SDValue(); + + SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); assert((!Folded || !VT.isVector()) && "Can't fold vectors ops with scalar operands"); return Folded; @@ -4908,8 +4972,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); - // For vectors, extract each constant element and fold them individually. - // Either input may be an undef value. + // TODO: All the folds below are performed lane-by-lane and assume a fixed + // vector width, however we should be able to do constant folds involving + // splat vector nodes too. + if (VT.isScalableVector()) + return SDValue(); + + // For fixed width vectors, extract each constant element and fold them + // individually. Either input may be an undef value. auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); if (!BV1 && !N1->isUndef()) return SDValue(); @@ -4985,6 +5055,13 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, if (!VT.isVector()) return SDValue(); + // TODO: All the folds below are performed lane-by-lane and assume a fixed + // vector width, however we should be able to do constant folds involving + // splat vector nodes too. + if (VT.isScalableVector()) + return SDValue(); + + // From this point onwards all vectors are assumed to be fixed width. unsigned NumElts = VT.getVectorNumElements(); auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { @@ -5107,8 +5184,13 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, } switch (Opcode) { - case ISD::FADD: case ISD::FSUB: + // -0.0 - undef --> undef (consistent with "fneg undef") + if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef()) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + + case ISD::FADD: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: @@ -5122,6 +5204,34 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, return SDValue(); } +SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { + assert(Val.getValueType().isInteger() && "Invalid AssertAlign!"); + + // There's no need to assert on a byte-aligned pointer. All pointers are at + // least byte aligned. + if (A == Align(1)) + return Val; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val}); + ID.AddInteger(A.value()); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), + Val.getValueType(), A); + createOperands(N, {Val}); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -5186,11 +5296,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; + case ISD::MUL: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { + APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); + APInt N2CImm = N2C->getAPIntValue(); + return getVScale(DL, VT, MulImm * N2CImm); + } + break; case ISD::UDIV: case ISD::UREM: case ISD::MULHU: case ISD::MULHS: - case ISD::MUL: case ISD::SDIV: case ISD::SREM: case ISD::SMIN: @@ -5213,7 +5332,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) + if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags)) return V; break; case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. @@ -5223,6 +5342,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Invalid FCOPYSIGN!"); break; case ISD::SHL: + if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { + APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); + APInt ShiftImm = N2C->getAPIntValue(); + return getVScale(DL, VT, MulImm << ShiftImm); + } + LLVM_FALLTHROUGH; case ISD::SRA: case ISD::SRL: if (SDValue V = simplifyShift(N1, N2)) @@ -5240,7 +5365,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). - assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && + assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >= + Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to @@ -5281,7 +5407,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "SIGN_EXTEND_INREG type should be vector iff the operand " "type is vector!"); assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && + EVT.getVectorElementCount() == VT.getVectorElementCount()) && "Vector element counts must match in SIGN_EXTEND_INREG"); assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending @@ -5323,27 +5449,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1.isUndef() || N2.isUndef()) return getUNDEF(VT); - // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF - if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length + // vectors. For scalable vectors we will provide appropriate support for + // dealing with arbitrary indices. + if (N2C && N1.getValueType().isFixedLengthVector() && + N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is - // expanding copies of large vectors from registers. - if (N2C && - N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0) { + // expanding copies of large vectors from registers. This only works for + // fixed length vectors, since we need to know the exact number of + // elements. + if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() && + N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) { unsigned Factor = N1.getOperand(0).getValueType().getVectorNumElements(); return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(N2C->getZExtValue() / Factor), - getConstant(N2C->getZExtValue() % Factor, DL, - N2.getValueType())); + getVectorIdxConstant(N2C->getZExtValue() % Factor, DL)); } - // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is - // expanding large vector constants. - if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { - SDValue Elt = N1.getOperand(N2C->getZExtValue()); + // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while + // lowering is expanding large vector constants. + if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR || + N1.getOpcode() == ISD::SPLAT_VECTOR)) { + assert((N1.getOpcode() != ISD::BUILD_VECTOR || + N1.getValueType().isFixedLengthVector()) && + "BUILD_VECTOR used for scalable vectors"); + unsigned Index = + N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0; + SDValue Elt = N1.getOperand(Index); if (VT != Elt.getValueType()) // If the vector element type is not legal, the BUILD_VECTOR operands @@ -5377,8 +5512,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed // when vector types are scalarized and v1iX is legal. - // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx) + // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx). + // Here we are completely ignoring the extract element index (N2), + // which is fine for fixed width vectors, since any index other than 0 + // is undefined anyway. However, this cannot be ignored for scalable + // vectors - in theory we could support this, but we don't want to do this + // without a profitability check. if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getValueType().isFixedLengthVector() && N1.getValueType().getVectorNumElements() == 1) { return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N1.getOperand(1)); @@ -5406,50 +5547,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } break; case ISD::EXTRACT_SUBVECTOR: - if (VT.isSimple() && N1.getValueType().isSimple()) { - assert(VT.isVector() && N1.getValueType().isVector() && - "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == - N1.getValueType().getVectorElementType() && - "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getSimpleValueType() && - "Extract subvector must be from larger vector to smaller vector!"); - - if (N2C) { - assert((VT.getVectorNumElements() + N2C->getZExtValue() - <= N1.getValueType().getVectorNumElements()) - && "Extract subvector overflow!"); - } - - // Trivial extraction. - if (VT.getSimpleVT() == N1.getSimpleValueType()) - return N1; - - // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. - if (N1.isUndef()) - return getUNDEF(VT); + EVT N1VT = N1.getValueType(); + assert(VT.isVector() && N1VT.isVector() && + "Extract subvector VTs must be vectors!"); + assert(VT.getVectorElementType() == N1VT.getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) && + "Cannot extract a scalable vector from a fixed length vector!"); + assert((VT.isScalableVector() != N1VT.isScalableVector() || + VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) && + "Extract subvector must be from larger vector to smaller vector!"); + assert(N2C && "Extract subvector index must be a constant"); + assert((VT.isScalableVector() != N1VT.isScalableVector() || + (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= + N1VT.getVectorMinNumElements()) && + "Extract subvector overflow!"); + + // Trivial extraction. + if (VT == N1VT) + return N1; - // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of - // the concat have the same type as the extract. - if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0 && - VT == N1.getOperand(0).getValueType()) { - unsigned Factor = VT.getVectorNumElements(); - return N1.getOperand(N2C->getZExtValue() / Factor); - } + // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. + if (N1.isUndef()) + return getUNDEF(VT); - // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created - // during shuffle legalization. - if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && - VT == N1.getOperand(1).getValueType()) - return N1.getOperand(1); + // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of + // the concat have the same type as the extract. + if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) { + unsigned Factor = VT.getVectorMinNumElements(); + return N1.getOperand(N2C->getZExtValue() / Factor); } + + // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created + // during shuffle legalization. + if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && + VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); break; } // Perform trivial constant folding. - if (SDValue SV = - FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2})) return SV; if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) @@ -5571,8 +5710,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "SETCC operands must have the same type!"); assert(VT.isVector() == N1.getValueType().isVector() && "SETCC type should be vector iff the operand type is vector!"); - assert((!VT.isVector() || - VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "SETCC vector element counts must match!"); // Use FoldSetCC to simplify SETCC's. if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) @@ -5594,8 +5733,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, llvm_unreachable("should use getVectorShuffle constructor!"); case ISD::INSERT_VECTOR_ELT: { ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); - // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF - if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except + // for scalable vectors where we will generate appropriate code to + // deal with out-of-bounds cases correctly. + if (N3C && N1.getValueType().isFixedLengthVector() && + N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) return getUNDEF(VT); // Undefined index can be assumed out-of-bounds, so that's UNDEF too. @@ -5612,33 +5754,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Inserting undef into undef is still undef. if (N1.isUndef() && N2.isUndef()) return getUNDEF(VT); - SDValue Index = N3; - if (VT.isSimple() && N1.getValueType().isSimple() - && N2.getValueType().isSimple()) { - assert(VT.isVector() && N1.getValueType().isVector() && - N2.getValueType().isVector() && - "Insert subvector VTs must be a vectors"); - assert(VT == N1.getValueType() && - "Dest and insert subvector source types must match!"); - assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && - "Insert subvector must be from smaller vector to larger vector!"); - if (isa<ConstantSDNode>(Index)) { - assert((N2.getValueType().getVectorNumElements() + - cast<ConstantSDNode>(Index)->getZExtValue() - <= VT.getVectorNumElements()) - && "Insert subvector overflow!"); - } - // Trivial insertion. - if (VT.getSimpleVT() == N2.getSimpleValueType()) - return N2; + EVT N2VT = N2.getValueType(); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(VT.isVector() && N2VT.isVector() && + "Insert subvector VTs must be vectors!"); + assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && + "Cannot insert a scalable vector into a fixed length vector!"); + assert((VT.isScalableVector() != N2VT.isScalableVector() || + VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) && + "Insert subvector must be from smaller vector to larger vector!"); + assert(isa<ConstantSDNode>(N3) && + "Insert subvector index must be constant"); + assert((VT.isScalableVector() != N2VT.isScalableVector() || + (N2VT.getVectorMinNumElements() + + cast<ConstantSDNode>(N3)->getZExtValue()) <= + VT.getVectorMinNumElements()) && + "Insert subvector overflow!"); + + // Trivial insertion. + if (VT == N2VT) + return N2; - // If this is an insert of an extracted vector into an undef vector, we - // can just use the input to the extract. - if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && - N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) - return N2.getOperand(0); - } + // If this is an insert of an extracted vector into an undef vector, we + // can just use the input to the extract. + if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) + return N2.getOperand(0); break; } case ISD::BITCAST: @@ -5867,7 +6010,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Alignment, + uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5891,37 +6034,38 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Alignment > SrcAlign) + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); + if (!SrcAlign || Alignment > *SrcAlign) SrcAlign = Alignment; + assert(SrcAlign && "SrcAlign must be set"); ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - + const MemOp Op = isZeroConstant + ? MemOp::Set(Size, DstAlignCanChange, Alignment, + /*IsZeroMemset*/ true, isVol) + : MemOp::Copy(Size, DstAlignCanChange, Alignment, + *SrcAlign, isVol, CopyFromConstant); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), - (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); + Align NewAlign = DL.getABITypeAlign(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Alignment && - DL.exceedsNaturalStackAlignment(Align(NewAlign))) - NewAlign /= 2; + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign / 2; if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); Alignment = NewAlign; } @@ -5968,7 +6112,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); OutChains.push_back(Store); } } @@ -5991,12 +6135,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), SrcMMOFlags); + commonAlignment(*SrcAlign, SrcOff).value(), + SrcMMOFlags); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6052,7 +6197,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -6074,29 +6219,27 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); + if (!SrcAlign || Alignment > *SrcAlign) + SrcAlign = Alignment; + assert(SrcAlign && "SrcAlign must be set"); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in - // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the - // correct code. - bool AllowOverlap = false; if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, - /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MemOps, Limit, + MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, + /*IsVolatile*/ true), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); - if (NewAlign > Align) { + Align NewAlign = DL.getABITypeAlign(Ty); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -6118,9 +6261,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = - DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); + Value = DAG.getLoad( + VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -6132,9 +6275,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Store; - Store = DAG.getStore(Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + Store = DAG.getStore( + Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -6151,7 +6294,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// \param Dst Pointer to destination memory location. /// \param Src Value of byte to write into the memory. /// \param Size Number of bytes to write. -/// \param Align Alignment of the destination in bytes. +/// \param Alignment Alignment of the destination in bytes. /// \param isVol True if destination is volatile. /// \param DstPtrInfo IR information on the memory pointer. /// \returns New head in the control flow, if lowering was successful, empty @@ -6162,7 +6305,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// memory size. static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, bool isVol, + uint64_t Size, Align Alignment, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. @@ -6183,21 +6326,19 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!TLI.findOptimalMemOpLowering( - MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, - (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, - /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes())) + MemOps, TLI.getMaxStoresPerMemset(OptSize), + MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), + DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); - if (NewAlign > Align) { + Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -6235,7 +6376,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; @@ -6256,12 +6397,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6270,9 +6409,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(),Align, - isVol, false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6281,7 +6420,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemcpy( - *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -6292,8 +6431,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - true, DstPtrInfo, SrcPtrInfo); + ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, SrcPtrInfo); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -6372,12 +6511,10 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, } SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6386,10 +6523,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemmoveLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6397,8 +6533,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. if (TSI) { - SDValue Result = TSI->EmitTargetCodeForMemmove( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + SDValue Result = + TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, + Alignment, isVol, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6476,11 +6613,9 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, } SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6489,9 +6624,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstPtrInfo); + SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Alignment, + isVol, DstPtrInfo); if (Result.getNode()) return Result; @@ -6501,7 +6636,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo); if (Result.getNode()) return Result; } @@ -6662,11 +6797,8 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, + EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { - if (Align == 0) // Ensure that codegen never sees alignment 0 - Align = getEVTAlignment(MemVT); - if (!Size && MemVT.isScalableVector()) Size = MemoryLocation::UnknownSize; else if (!Size) @@ -6674,7 +6806,7 @@ SDValue SelectionDAG::getMemIntrinsicNode( MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -6686,8 +6818,6 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || - Opcode == ISD::LIFETIME_START || - Opcode == ISD::LIFETIME_END || ((int)Opcode <= std::numeric_limits<int>::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -6795,13 +6925,11 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); @@ -6810,9 +6938,10 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -6867,7 +6996,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, - unsigned Alignment, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -6885,7 +7014,7 @@ SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -6918,12 +7047,10 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - unsigned Alignment, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(Val.getValueType()); MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); @@ -6932,8 +7059,10 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); + uint64_t Size = + MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -6969,13 +7098,11 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - EVT SVT, unsigned Alignment, + EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(SVT); MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); @@ -7288,9 +7415,24 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { return SDValue(); } -// TODO: Use fast-math-flags to enable more simplifications. -SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { +SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, + SDNodeFlags Flags) { + // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand + // (an undef operand can be chosen to be Nan/Inf), then the result of this + // operation is poison. That result can be relaxed to undef. + ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true); ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); + bool HasNan = (XC && XC->getValueAPF().isNaN()) || + (YC && YC->getValueAPF().isNaN()); + bool HasInf = (XC && XC->getValueAPF().isInfinity()) || + (YC && YC->getValueAPF().isInfinity()); + + if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef())) + return getUNDEF(X.getValueType()); + + if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef())) + return getUNDEF(X.getValueType()); + if (!YC) return SDValue(); @@ -7394,6 +7536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, createOperands(N, Ops); } + N->setFlags(Flags); InsertNode(N); SDValue V(N, 0); NewSDValueDbgMsg(V, "Creating new node: ", this); @@ -7406,7 +7549,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); @@ -7481,6 +7624,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); createOperands(N, Ops); } + + N->setFlags(Flags); InsertNode(N); SDValue V(N, 0); NewSDValueDbgMsg(V, "Creating new node: ", this); @@ -7919,7 +8064,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; @@ -9196,9 +9341,8 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) return SDValue(); BinOp = (ISD::NodeType)CandidateBinOp; - return getNode( - ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, - getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + return getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getVectorIdxConstant(0, SDLoc(Op))); }; // At each stage, we're looking for something that looks like: @@ -9246,6 +9390,28 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, PrevOp = Op; } + // Handle subvector reductions, which tend to appear after the shuffle + // reduction stages. + while (Op.getOpcode() == CandidateBinOp) { + unsigned NumElts = Op.getValueType().getVectorNumElements(); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op0.getOperand(0) != Op1.getOperand(0)) + break; + SDValue Src = Op0.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (NumSrcElts != (2 * NumElts)) + break; + if (!(Op0.getConstantOperandAPInt(1) == 0 && + Op1.getConstantOperandAPInt(1) == NumElts) && + !(Op1.getConstantOperandAPInt(1) == 0 && + Op0.getConstantOperandAPInt(1) == NumElts)) + break; + Op = Src; + } + BinOp = (ISD::NodeType)CandidateBinOp; return Op; } @@ -9276,9 +9442,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { if (OperandVT.isVector()) { // A vector operand; extract a single element. EVT OperandEltVT = OperandVT.getVectorElementType(); - Operands[j] = - getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout()))); + Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, + Operand, getVectorIdxConstant(i, dl)); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -9395,9 +9560,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } -/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if -/// it cannot be inferred. -unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { +/// InferPtrAlignment - Infer alignment of a load / store address. Return None +/// if it cannot be inferred. +MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV = nullptr; int64_t GVOffset = 0; @@ -9406,9 +9571,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { KnownBits Known(PtrWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); - unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; - if (Align) - return MinAlign(Align, GVOffset); + if (AlignBits) + return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset); } // If this is a direct reference to a stack slot, use information about the @@ -9426,12 +9590,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (FrameIdx != INT_MIN) { const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return FIInfoAlign; + return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); } - return 0; + return None; } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -9447,20 +9609,58 @@ std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { return std::make_pair(LoVT, HiVT); } +/// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a +/// type, dependent on an enveloping VT that has been split into two identical +/// pieces. Sets the HiIsEmpty flag when hi type has zero storage size. +std::pair<EVT, EVT> +SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, + bool *HiIsEmpty) const { + EVT EltTp = VT.getVectorElementType(); + bool IsScalable = VT.isScalableVector(); + // Examples: + // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty) + // custom VL=9 with enveloping VL=8/8 yields 8/1 + // custom VL=10 with enveloping VL=8/8 yields 8/2 + // etc. + unsigned VTNumElts = VT.getVectorNumElements(); + unsigned EnvNumElts = EnvVT.getVectorNumElements(); + EVT LoVT, HiVT; + if (VTNumElts > EnvNumElts) { + LoVT = EnvVT; + HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts, + IsScalable); + *HiIsEmpty = false; + } else { + // Flag that hi type has zero storage size, but return split envelop type + // (this would be easier if vector types with zero elements were allowed). + LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable); + HiVT = EnvVT; + *HiIsEmpty = true; + } + return std::make_pair(LoVT, HiVT); +} + /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the /// low/high part. std::pair<SDValue, SDValue> SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT) { - assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= - N.getValueType().getVectorNumElements() && + assert(LoVT.isScalableVector() == HiVT.isScalableVector() && + LoVT.isScalableVector() == N.getValueType().isScalableVector() && + "Splitting vector with an invalid mixture of fixed and scalable " + "vector types"); + assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <= + N.getValueType().getVectorMinNumElements() && "More vector elements requested than available!"); SDValue Lo, Hi; - Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, - getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + Lo = + getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, getVectorIdxConstant(0, DL)); + // For scalable vectors it is safe to use LoVT.getVectorMinNumElements() + // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales + // IDX with the runtime scaling factor of the result vector type. For + // fixed-width result vectors, that runtime scaling factor is 1. Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, - getConstant(LoVT.getVectorNumElements(), DL, - TLI->getVectorIdxTy(getDataLayout()))); + getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); return std::make_pair(Lo, Hi); } @@ -9470,22 +9670,22 @@ SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), NextPowerOf2(VT.getVectorNumElements())); return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, - getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + getVectorIdxConstant(0, DL)); } void SelectionDAG::ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args, - unsigned Start, unsigned Count) { + unsigned Start, unsigned Count, + EVT EltVT) { EVT VT = Op.getValueType(); if (Count == 0) Count = VT.getVectorNumElements(); - - EVT EltVT = VT.getVectorElementType(); - EVT IdxTy = TLI->getVectorIdxTy(getDataLayout()); + if (EltVT == EVT()) + EltVT = VT.getVectorElementType(); SDLoc SL(Op); for (unsigned i = Start, e = Start + Count; i != e; ++i) { - Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, - Op, getConstant(i, SL, IdxTy))); + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Op, + getVectorIdxConstant(i, SL))); } } |