diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/CodeGen/SelectionDAG/SelectionDAG.cpp | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 283 |
1 files changed, 160 insertions, 123 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5852e693fa9f..52a71b91d93f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { break; case ISD::TargetExternalSymbol: { ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); - Erased = TargetExternalSymbols.erase( - std::pair<std::string,unsigned char>(ESN->getSymbol(), - ESN->getTargetFlags())); + Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>( + ESN->getSymbol(), ESN->getTargetFlags())); break; } case ISD::MCSymbol: { @@ -1084,6 +1083,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); + SDCallSiteDbgInfo.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast<CondCodeSDNode*>(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t Offset, bool isTargetGA, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); @@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { } SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; @@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, } SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); @@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { } SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags) { + unsigned TargetFlags) { SDNode *&N = - TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, - TargetFlags)]; + TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; if (N) return SDValue(N, 0); N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); InsertNode(N); @@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, } SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset, - bool isTarget, - unsigned char TargetFlags) { + int64_t Offset, bool isTarget, + unsigned TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; @@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { EVT VT = Node->getValueType(0); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); + const MaybeAlign MA(Node->getConstantOperandVal(3)); SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, MachinePointerInfo(V)); SDValue VAList = VAListLoad; - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - + if (MA && *MA > TLI.getMinStackArgumentAlignment()) { VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - getConstant(Align - 1, dl, VAList.getValueType())); + getConstant(MA->value() - 1, dl, VAList.getValueType())); - VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, - getConstant(-(int64_t)Align, dl, VAList.getValueType())); + VAList = + getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg @@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } case ISD::OR: case ISD::XOR: - // If the LHS or RHS don't contribute bits to the or, drop them. - if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) - return V.getOperand(1); - if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) - return V.getOperand(0); - break; + case ISD::SIGN_EXTEND_INREG: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } - case ISD::SIGN_EXTEND_INREG: - EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT(); - unsigned ExVTBits = ExVT.getScalarSizeInBits(); - - // If none of the extended bits are demanded, eliminate the sextinreg. - if (DemandedBits.getActiveBits() <= ExVTBits) - return V.getOperand(0); - - break; } return SDValue(); } @@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. static const APInt *getValidShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { // Shifting more than the bitwidth is not valid. const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.ult(V.getScalarValueSizeInBits())) + if (ShAmt.ult(BitWidth)) return &ShAmt; } return nullptr; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the minimum value. +static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MinShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MinShAmt && MinShAmt->ule(ShAmt)) + continue; + MinShAmt = &ShAmt; + } + return MinShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, return Known; } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return Known; // Limit search depth. KnownBits Known2; @@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); - } else { - Known = computeKnownBits(Src, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } + Known = computeKnownBits(Src, DemandedSrc, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) { - // If the shift amount is a vector of constants see if we can bound - // the number of upper zero bits. - unsigned ShiftAmountMin = BitWidth; - for (unsigned i = 0; i != BV->getNumOperands(); ++i) { - if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) { - const APInt &ShAmt = C->getAPIntValue(); - if (ShAmt.ult(BitWidth)) { - ShiftAmountMin = std::min<unsigned>(ShiftAmountMin, - ShAmt.getZExtValue()); - continue; - } - } - // Don't know anything. - ShiftAmountMin = 0; - break; - } - - Known.Zero.setHighBits(ShiftAmountMin); + } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { + // Minimum shift high bits are known zero. + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); } break; case ISD::SRA: @@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + if (Known2.isNegative() && LowBits.intersects(Known2.One)) Known.One |= ~LowBits; assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } @@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Val.getNumSignBits(); } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. if (!DemandedElts) @@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; + case ISD::MUL: { + // The output of the Mul can be at most twice the valid bits in the inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (SignBitsOp0 == 1) + break; + unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + if (SignBitsOp1 == 1) + break; + unsigned OutValidBits = + (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); + return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; + } case ISD::TRUNCATE: { // Check if the sign bits of source go down as far as the truncated value. unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); @@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } - return ComputeNumSignBits(Src, Depth + 1); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) return true; - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return false; // Limit search depth. // TODO: Handle vectors. @@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Flags); @@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; - case ISD::FP_ROUND_INREG: { - EVT EVT = cast<VTSDNode>(N2)->getVT(); - assert(VT == N1.getValueType() && "Not an inreg round!"); - assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && - "Cannot FP_ROUND_INREG integer types"); - assert(EVT.isVector() == VT.isVector() && - "FP_ROUND_INREG type should be vector iff the operand " - "type is vector!"); - assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && - "Vector element counts must match in FP_ROUND_INREG"); - assert(EVT.bitsLE(VT) && "Not rounding down!"); - (void)EVT; - if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. - break; - } case ISD::FP_ROUND: assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && @@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, std::swap(N1, N2); } else { switch (Opcode) { - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: return getUNDEF(VT); // fold op(undef, arg2) -> undef @@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, unsigned Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + if (Alignment > SrcAlign) + SrcAlign = Alignment; ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), @@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Align && - DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign /= 2; + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; - if (NewAlign > Align) { + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) { - Store = DAG.getStore(Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, - MMOFlags); + Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); } } @@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, - MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { + MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, @@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, } auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && @@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getMask().getValueType().getVectorNumElements() == @@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); @@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); return N; } @@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); - if (!OldLoad->hasAnyUseOfValue(1)) + if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1)) return NewChain; SDValue TokenFactor = @@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() { GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, - int64_t o, unsigned char TF) + int64_t o, unsigned TF) : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, // Loads don't have side effects, look through them. if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { - if (!Ld->isVolatile()) + if (Ld->isUnordered()) return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); } return false; @@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { SDValue SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef<ISD::NodeType> CandidateBinOps) { + ArrayRef<ISD::NodeType> CandidateBinOps, + bool AllowPartials) { // The pattern must end in an extract from index 0. if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isNullConstant(Extract->getOperand(1))) return SDValue(); - SDValue Op = Extract->getOperand(0); - unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); - // Match against one of the candidate binary ops. + SDValue Op = Extract->getOperand(0); if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { return Op.getOpcode() == unsigned(BinOp); })) return SDValue(); + // Floating-point reductions may require relaxed constraints on the final step + // of the reduction because they may reorder intermediate operations. + unsigned CandidateBinOp = Op.getOpcode(); + if (Op.getValueType().isFloatingPoint()) { + SDNodeFlags Flags = Op->getFlags(); + switch (CandidateBinOp) { + case ISD::FADD: + if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) + return SDValue(); + break; + default: + llvm_unreachable("Unhandled FP opcode for binop reduction"); + } + } + + // Matching failed - attempt to see if we did enough stages that a partial + // reduction from a subvector is possible. + auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { + if (!AllowPartials || !Op) + return SDValue(); + EVT OpVT = Op.getValueType(); + EVT OpSVT = OpVT.getScalarType(); + EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); + if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) + return SDValue(); + BinOp = (ISD::NodeType)CandidateBinOp; + return getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + }; + // At each stage, we're looking for something that looks like: // %s = shufflevector <8 x i32> %op, <8 x i32> undef, // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, @@ -9030,10 +9057,16 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // <4,5,6,7,u,u,u,u> // <2,3,u,u,u,u,u,u> // <1,u,u,u,u,u,u,u> - unsigned CandidateBinOp = Op.getOpcode(); + // While a partial reduction match would be: + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); + SDValue PrevOp; for (unsigned i = 0; i < Stages; ++i) { + unsigned MaskEnd = (1 << i); + if (Op.getOpcode() != CandidateBinOp) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // The first operand of the shuffle should be the same as the other operand // of the binop. if (!Shuffle || Shuffle->getOperand(0) != Op) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); // Verify the shuffle has the expected (at this stage of the pyramid) mask. - for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index) - if (Shuffle->getMaskElt(Index) != MaskEnd + Index) - return SDValue(); + for (int Index = 0; Index < (int)MaskEnd; ++Index) + if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) + return PartialReduction(PrevOp, MaskEnd); + + PrevOp = Op; } BinOp = (ISD::NodeType)CandidateBinOp; @@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { getShiftAmountOperand(Operands[0].getValueType(), Operands[1]))); break; - case ISD::SIGN_EXTEND_INREG: - case ISD::FP_ROUND_INREG: { + case ISD::SIGN_EXTEND_INREG: { EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], @@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; + // TODO: probably too restrictive for atomics, revisit + if (!LD->isSimple()) + return false; if (LD->isIndexed() || Base->isIndexed()) return false; if (LD->getChain() != Base->getChain()) |