diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 894 |
1 files changed, 623 insertions, 271 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 30d202494320..5be1892a44f6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -161,8 +162,13 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { unsigned SplatBitSize; bool HasUndefs; unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + // Endianness does not matter here. We are checking for a splat given the + // element size of the vector, and if we find such a splat for little endian + // layout, then that should be valid also for big endian (as the full vector + // size is known to be a multiple of the element size). + const bool IsBigEndian = false; return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, - EltSize) && + EltSize, IsBigEndian) && EltSize == SplatBitSize; } @@ -344,12 +350,13 @@ bool ISD::isFreezeUndef(const SDNode *N) { return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef(); } -bool ISD::matchUnaryPredicate(SDValue Op, - std::function<bool(ConstantSDNode *)> Match, - bool AllowUndefs) { +template <typename ConstNodeType> +bool ISD::matchUnaryPredicateImpl(SDValue Op, + std::function<bool(ConstNodeType *)> Match, + bool AllowUndefs) { // FIXME: Add support for scalar UNDEF cases? - if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) - return Match(Cst); + if (auto *C = dyn_cast<ConstNodeType>(Op)) + return Match(C); // FIXME: Add support for vector UNDEF cases? if (ISD::BUILD_VECTOR != Op.getOpcode() && @@ -364,12 +371,17 @@ bool ISD::matchUnaryPredicate(SDValue Op, continue; } - auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i)); if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) return false; } return true; } +// Build used template types. +template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>( + SDValue, std::function<bool(ConstantSDNode *)>, bool); +template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>( + SDValue, std::function<bool(ConstantFPSDNode *)>, bool); bool ISD::matchBinaryPredicate( SDValue LHS, SDValue RHS, @@ -951,7 +963,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { /// doNotCSE - Return true if CSE should not be performed for this node. static bool doNotCSE(SDNode *N) { if (N->getValueType(0) == MVT::Glue) - return true; // Never CSE anything that produces a flag. + return true; // Never CSE anything that produces a glue result. switch (N->getOpcode()) { default: break; @@ -963,7 +975,7 @@ static bool doNotCSE(SDNode *N) { // Check that remaining values produced are not flags. for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) if (N->getValueType(i) == MVT::Glue) - return true; // Never CSE anything that produces a flag. + return true; // Never CSE anything that produces a glue result. return false; } @@ -1197,7 +1209,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { } #ifndef NDEBUG // Verify that the node was actually in one of the CSE maps, unless it has a - // flag result (which cannot be CSE'd) or is one of the special cases that are + // glue result (which cannot be CSE'd) or is one of the special cases that are // not subject to CSE. if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && !N->isMachineOpcode() && !doNotCSE(N)) { @@ -1296,17 +1308,16 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, } Align SelectionDAG::getEVTAlign(EVT VT) const { - Type *Ty = VT == MVT::iPTR ? - PointerType::get(Type::getInt8Ty(*getContext()), 0) : - VT.getTypeForEVT(*getContext()); + Type *Ty = VT == MVT::iPTR ? PointerType::get(*getContext(), 0) + : VT.getTypeForEVT(*getContext()); return getDataLayout().getABITypeAlign(Ty); } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), OptLevel(OL), - EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)), +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL) + : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), + getVTList(MVT::Other, MVT::Glue)), Root(getEntryNode()) { InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); @@ -1454,6 +1465,51 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } +SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, + EVT VT) { + assert(!VT.isVector()); + auto Type = Op.getValueType(); + SDValue DestOp; + if (Type == VT) + return Op; + auto Size = Op.getValueSizeInBits(); + DestOp = getBitcast(MVT::getIntegerVT(Size), Op); + if (DestOp.getValueType() == VT) + return DestOp; + + return getAnyExtOrTrunc(DestOp, DL, VT); +} + +SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL, + EVT VT) { + assert(!VT.isVector()); + auto Type = Op.getValueType(); + SDValue DestOp; + if (Type == VT) + return Op; + auto Size = Op.getValueSizeInBits(); + DestOp = getBitcast(MVT::getIntegerVT(Size), Op); + if (DestOp.getValueType() == VT) + return DestOp; + + return getSExtOrTrunc(DestOp, DL, VT); +} + +SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL, + EVT VT) { + assert(!VT.isVector()); + auto Type = Op.getValueType(); + SDValue DestOp; + if (Type == VT) + return Op; + auto Size = Op.getValueSizeInBits(); + DestOp = getBitcast(MVT::getIntegerVT(Size), Op); + if (DestOp.getValueType() == VT) + return DestOp; + + return getZExtOrTrunc(DestOp, DL, VT); +} + SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT) { if (VT.bitsLE(Op.getValueType())) @@ -1570,7 +1626,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); - APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); + APInt NewVal; + if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT)) + NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits()); + else + NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } // In other cases the element type is illegal and needs to be expanded, for @@ -1587,7 +1647,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. - if (VT.isScalableVector()) { + if (VT.isScalableVector() || + TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) { assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && "Can only handle an even split!"); unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; @@ -1801,6 +1862,13 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, return SDValue(N, 0); } +SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain, + const SDLoc &DL) { + EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout()); + return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain, + getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true)); +} + SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, MaybeAlign Alignment, int Offset, bool isTarget, unsigned TargetFlags) { @@ -1855,23 +1923,6 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, - unsigned TargetFlags) { - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt); - ID.AddInteger(Index); - ID.AddInteger(Offset); - ID.AddInteger(TargetFlags); - void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, IP)) - return SDValue(E, 0); - - auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags); - CSEMap.InsertNode(N, IP); - InsertNode(N); - return SDValue(N, 0); -} - SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt); @@ -1950,13 +2001,10 @@ SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, if (ConstantFold) { const MachineFunction &MF = getMachineFunction(); - auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange); - if (Attr.isValid()) { - unsigned VScaleMin = Attr.getVScaleRangeMin(); - if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax()) - if (*VScaleMax == VScaleMin) - return getConstant(MulImm * VScaleMin, DL, VT); - } + const Function &F = MF.getFunction(); + ConstantRange CR = getVScaleRange(&F, 64); + if (const APInt *C = CR.getSingleElement()) + return getConstant(MulImm * C->getZExtValue(), DL, VT); } return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); @@ -2121,11 +2169,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, if (Splat && UndefElements.none()) { // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the // number of elements match or the value splatted is a zero constant. - if (SameNumElts) + if (SameNumElts || isNullConstant(Splat)) return N1; - if (auto *C = dyn_cast<ConstantSDNode>(Splat)) - if (C->isZero()) - return N1; } // If the shuffle itself creates a splat, build the vector directly. @@ -2490,7 +2535,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, // icmp X, X -> true/false // icmp X, undef -> true/false because undef could be X. - if (N1 == N2) + if (N1.isUndef() || N2.isUndef() || N1 == N2) return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); } @@ -2836,6 +2881,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, } } + // Fallback - this is a splat if all demanded elts are the same constant. + if (computeKnownBits(V, DemandedElts, Depth).isConstant()) { + UndefElts = ~DemandedElts; + return true; + } + return false; } @@ -3057,6 +3108,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth); break; } + case ISD::SPLAT_VECTOR_PARTS: { + unsigned ScalarSize = Op.getOperand(0).getScalarValueSizeInBits(); + assert(ScalarSize * Op.getNumOperands() == BitWidth && + "Expected SPLAT_VECTOR_PARTS scalars to cover element width"); + for (auto [I, SrcOp] : enumerate(Op->ops())) { + Known.insertBits(computeKnownBits(SrcOp, Depth + 1), ScalarSize * I); + } + break; + } case ISD::BUILD_VECTOR: assert(!Op.getValueType().isScalableVector()); // Collect the known bits that are shared by every demanded vector element. @@ -3688,14 +3748,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert(Op.getResNo() == 0 && "We only compute knownbits for the difference here."); - // TODO: Compute influence of the carry operand. - if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) - break; + // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in. + KnownBits Borrow(1); + if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) { + Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Borrow has bit width 1 + Borrow = Borrow.trunc(1); + } else { + Borrow.setAllZero(); + } Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, - Known, Known2); + Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow); break; } case ISD::UADDO: @@ -3720,15 +3785,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (Opcode == ISD::ADDE) // Can't track carry from glue, set carry to unknown. Carry.resetAll(); - else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) - // TODO: Compute known bits for the carry operand. Not sure if it is worth - // the trouble (how often will we find a known carry bit). And I haven't - // tested this very much yet, but something like this might work: - // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); - // Carry = Carry.zextOrTrunc(1, false); - Carry.resetAll(); - else + else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) { + Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Carry has bit width 1 + Carry = Carry.trunc(1); + } else { Carry.setAllZero(); + } Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -4047,8 +4110,11 @@ SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const { if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1) return OFK_Never; - // TODO: Add ConstantRange::signedSubMayOverflow handling. - return OFK_Sometime; + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true); + return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range)); } SelectionDAG::OverflowKind @@ -4057,7 +4123,53 @@ SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const { if (isNullConstant(N1)) return OFK_Never; - // TODO: Add ConstantRange::unsignedSubMayOverflow handling. + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); + return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range)); +} + +SelectionDAG::OverflowKind +SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const { + // X * 0 and X * 1 never overflow. + if (isNullConstant(N1) || isOneConstant(N1)) + return OFK_Never; + + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); + return mapOverflowResult(N0Range.unsignedMulMayOverflow(N1Range)); +} + +SelectionDAG::OverflowKind +SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const { + // X * 0 and X * 1 never overflow. + if (isNullConstant(N1) || isOneConstant(N1)) + return OFK_Never; + + // Get the size of the result. + unsigned BitWidth = N0.getScalarValueSizeInBits(); + + // Sum of the sign bits. + unsigned SignBits = ComputeNumSignBits(N0) + ComputeNumSignBits(N1); + + // If we have enough sign bits, then there's no overflow. + if (SignBits > BitWidth + 1) + return OFK_Never; + + if (SignBits == BitWidth + 1) { + // The overflow occurs when the true multiplication of the + // the operands is the minimum negative number. + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + // If one of the operands is non-negative, then there's no + // overflow. + if (N0Known.isNonNegative() || N1Known.isNonNegative()) + return OFK_Never; + } + return OFK_Sometime; } @@ -4069,8 +4181,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { unsigned BitWidth = OpVT.getScalarSizeInBits(); // Is the constant a known power of 2? - if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val)) - return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + if (ISD::matchUnaryPredicate(Val, [BitWidth](ConstantSDNode *C) { + return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + })) + return true; // A left-shift of a constant one will have exactly one bit set because // shifting the bit off the end is undefined. @@ -4078,6 +4192,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue() == 1) return true; + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && + isKnownNeverZero(Val, Depth); } // Similarly, a logical right-shift of a constant sign-bit will have exactly @@ -4086,8 +4202,13 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue().isSignMask()) return true; + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && + isKnownNeverZero(Val, Depth); } + if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR) + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); + // Are all operands of a build vector constant powers of two? if (Val.getOpcode() == ISD::BUILD_VECTOR) if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) { @@ -4109,6 +4230,34 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1)) return true; + if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX || + Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX) + return isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1) && + isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); + + if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT) + return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) && + isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1); + + if (Val.getOpcode() == ISD::AND) { + // Looking for `x & -x` pattern: + // If x == 0: + // x & -x -> 0 + // If x != 0: + // x & -x -> non-zero pow2 + // so if we find the pattern return whether we know `x` is non-zero. + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + SDValue NegOp = Val.getOperand(OpIdx); + if (NegOp.getOpcode() == ISD::SUB && + NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) && + isNullOrNullSplat(NegOp.getOperand(0))) + return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth); + } + } + + if (Val.getOpcode() == ISD::ZERO_EXTEND) + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); + // More could be done here, though the above checks are enough // to handle some common cases. return false; @@ -4869,8 +5018,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, unsigned Opcode = Op.getOpcode(); switch (Opcode) { - case ISD::AssertSext: - case ISD::AssertZext: case ISD::FREEZE: case ISD::CONCAT_VECTORS: case ISD::INSERT_SUBVECTOR: @@ -4886,7 +5033,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::BITREVERSE: case ISD::PARITY: case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: case ISD::TRUNCATE: case ISD::SIGN_EXTEND_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: @@ -4896,6 +5042,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::BUILD_PAIR: return false; + // Matches hasPoisonGeneratingFlags(). + case ISD::ZERO_EXTEND: + return ConsiderFlags && Op->getFlags().hasNonNeg(); + case ISD::ADD: case ISD::SUB: case ISD::MUL: @@ -4932,6 +5082,15 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return true; } +bool SelectionDAG::isADDLike(SDValue Op) const { + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::OR) + return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); + if (Opcode == ISD::XOR) + return isMinSignedConstant(Op.getOperand(1)); + return false; +} + bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || !isa<ConstantSDNode>(Op.getOperand(1))) @@ -4977,12 +5136,15 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FCANONICALIZE: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FTRUNC: case ISD::FFLOOR: case ISD::FCEIL: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FNEARBYINT: case ISD::FLDEXP: { if (SNaN) @@ -5112,21 +5274,29 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { return isKnownNeverZero(Op.getOperand(1), Depth + 1) && isKnownNeverZero(Op.getOperand(2), Depth + 1); - case ISD::SHL: + case ISD::SHL: { if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) return isKnownNeverZero(Op.getOperand(0), Depth + 1); - - // 1 << X is never zero. TODO: This can be expanded if we can bound X. - // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero() - if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0]) + KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); + // 1 << X is never zero. + if (ValKnown.One[0]) + return true; + // If max shift cnt of known ones is non-zero, result is non-zero. + APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); + if (MaxCnt.ult(ValKnown.getBitWidth()) && + !ValKnown.One.shl(MaxCnt).isZero()) return true; break; - + } case ISD::UADDSAT: case ISD::UMAX: return isKnownNeverZero(Op.getOperand(1), Depth + 1) || isKnownNeverZero(Op.getOperand(0), Depth + 1); + // TODO for smin/smax: If either operand is known negative/positive + // respectively we don't need the other to be known at all. + case ISD::SMAX: + case ISD::SMIN: case ISD::UMIN: return isKnownNeverZero(Op.getOperand(1), Depth + 1) && isKnownNeverZero(Op.getOperand(0), Depth + 1); @@ -5140,16 +5310,19 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { return isKnownNeverZero(Op.getOperand(0), Depth + 1); case ISD::SRA: - case ISD::SRL: + case ISD::SRL: { if (Op->getFlags().hasExact()) return isKnownNeverZero(Op.getOperand(0), Depth + 1); - // Signed >> X is never zero. TODO: This can be expanded if we can bound X. - // The expression is really - // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero() - if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative()) + KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); + if (ValKnown.isNegative()) + return true; + // If max shift cnt of known ones is non-zero, result is non-zero. + APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); + if (MaxCnt.ult(ValKnown.getBitWidth()) && + !ValKnown.One.lshr(MaxCnt).isZero()) return true; break; - + } case ISD::UDIV: case ISD::SDIV: // div exact can only produce a zero if the dividend is zero. @@ -5425,161 +5598,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, const SDNodeFlags Flags) { assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); - // Constant fold unary operations with an integer constant operand. Even - // opaque constant will be folded, because the folding of unary operations - // doesn't create new constants with different values. Nevertheless, the - // opaque flag is preserved during folding to prevent future folding with - // other constants. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { - const APInt &Val = C->getAPIntValue(); - switch (Opcode) { - default: break; - case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - case ISD::TRUNCATE: - if (C->isOpaque()) - break; - [[fallthrough]]; - case ISD::ZERO_EXTEND: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - case ISD::ANY_EXTEND: - // Some targets like RISCV prefer to sign extend some types. - if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT)) - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - case ISD::UINT_TO_FP: - case ISD::SINT_TO_FP: { - APFloat apf(EVTToAPFloatSemantics(VT), - APInt::getZero(VT.getSizeInBits())); - (void)apf.convertFromAPInt(Val, - Opcode==ISD::SINT_TO_FP, - APFloat::rmNearestTiesToEven); - return getConstantFP(apf, DL, VT); - } - case ISD::BITCAST: - if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) - return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); - if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); - if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); - if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) - return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); - break; - case ISD::ABS: - return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::BITREVERSE: - return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::BSWAP: - return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::CTPOP: - return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::FP16_TO_FP: - case ISD::BF16_TO_FP: { - bool Ignored; - APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() - : APFloat::BFloat(), - (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); - - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)FPV.convert(EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &Ignored); - return getConstantFP(FPV, DL, VT); - } - case ISD::STEP_VECTOR: { - if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this)) - return V; - break; - } - } - } - - // Constant fold unary operations with a floating point constant operand. - if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) { - APFloat V = C->getValueAPF(); // make copy - switch (Opcode) { - case ISD::FNEG: - V.changeSign(); - return getConstantFP(V, DL, VT); - case ISD::FABS: - V.clearSign(); - return getConstantFP(V, DL, VT); - case ISD::FCEIL: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, DL, VT); - break; - } - case ISD::FTRUNC: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, DL, VT); - break; - } - case ISD::FFLOOR: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, DL, VT); - break; - } - case ISD::FP_EXTEND: { - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, DL, VT); - } - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: { - bool ignored; - APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); - // FIXME need to be more flexible about rounding mode. - APFloat::opStatus s = - V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); - if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual - break; - return getConstant(IntVal, DL, VT); - } - case ISD::BITCAST: - if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) - return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16) - return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); - break; - case ISD::FP_TO_FP16: - case ISD::FP_TO_BF16: { - bool Ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() - : APFloat::BFloat(), - APFloat::rmNearestTiesToEven, &Ignored); - return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); - } - } - } // Constant fold unary operations with a vector integer or float operand. switch (Opcode) { @@ -5595,12 +5613,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: case ISD::TRUNCATE: case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: + case ISD::FP16_TO_FP: + case ISD::BF16_TO_FP: + case ISD::BITCAST: case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: @@ -5608,7 +5631,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - case ISD::CTPOP: { + case ISD::CTPOP: + case ISD::STEP_VECTOR: { SDValue Ops = {N1}; if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) return Fold; @@ -5697,6 +5721,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. return getConstant(0, DL, VT); + + // Skip unnecessary zext_inreg pattern: + // (zext (trunc x)) -> x iff the upper bits are known zero. + // TODO: Remove (zext (trunc (and x, c))) exception which some targets + // use to recognise zext_inreg patterns. + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = N1.getOperand(0); + if (OpOp.getValueType() == VT) { + if (OpOp.getOpcode() != ISD::AND) { + APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(), + N1.getScalarValueSizeInBits()); + if (MaskedValueIsZero(OpOp, HiBits)) { + transferDbgValues(N1, OpOp); + return OpOp; + } + } + } + } break; case ISD::ANY_EXTEND: assert(VT.isInteger() && N1.getValueType().isInteger() && @@ -5853,7 +5895,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *N; SDVTList VTs = getVTList(VT); SDValue Ops[] = {N1}; - if (VT != MVT::Glue) { // Don't CSE flag producing nodes + if (VT != MVT::Glue) { // Don't CSE glue producing nodes FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; @@ -6040,9 +6082,174 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (isUndef(Opcode, Ops)) return getUNDEF(VT); + // Handle unary special cases. + if (NumOps == 1) { + SDValue N1 = Ops[0]; + + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. + if (auto *C = dyn_cast<ConstantSDNode>(N1)) { + const APInt &Val = C->getAPIntValue(); + switch (Opcode) { + case ISD::SIGN_EXTEND: + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::TRUNCATE: + if (C->isOpaque()) + break; + [[fallthrough]]; + case ISD::ZERO_EXTEND: + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::ANY_EXTEND: + // Some targets like RISCV prefer to sign extend some types. + if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT)) + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::ABS: + return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::BITREVERSE: + return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::BSWAP: + return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTPOP: + return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getZero(VT.getSizeInBits())); + (void)apf.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, DL, VT); + } + case ISD::FP16_TO_FP: + case ISD::BF16_TO_FP: { + bool Ignored; + APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() + : APFloat::BFloat(), + (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); + + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)FPV.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstantFP(FPV, DL, VT); + } + case ISD::STEP_VECTOR: + if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this)) + return V; + break; + case ISD::BITCAST: + if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) + return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); + break; + } + } + + // Constant fold unary operations with a floating point constant operand. + if (auto *C = dyn_cast<ConstantFPSDNode>(N1)) { + APFloat V = C->getValueAPF(); // make copy + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, DL, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, DL, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + return SDValue(); + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + return SDValue(); + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + return SDValue(); + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &ignored); + return getConstantFP(V, DL, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + bool ignored; + APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = + V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); + if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + return getConstant(IntVal, DL, VT); + } + case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: { + bool Ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() + : APFloat::BFloat(), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); + } + case ISD::BITCAST: + if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, + VT); + if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, + VT); + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, + VT); + if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); + break; + } + } + + // Early-out if we failed to constant fold a bitcast. + if (Opcode == ISD::BITCAST) + return SDValue(); + } + // Handle binops special cases. if (NumOps == 2) { - if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1])) + if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops)) return CFP; if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { @@ -6235,11 +6442,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, - EVT VT, SDValue N1, SDValue N2) { + EVT VT, ArrayRef<SDValue> Ops) { + // TODO: Add support for unary/ternary fp opcodes. + if (Ops.size() != 2) + return SDValue(); + // TODO: We don't do any constant folding for strict FP opcodes here, but we // should. That will require dealing with a potentially non-default // rounding mode, checking the "opStatus" return value from the APFloat // math calculations, and possibly other variations. + SDValue N1 = Ops[0]; + SDValue N2 = Ops[1]; ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false); ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false); if (N1CFP && N2CFP) { @@ -6600,6 +6813,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } return getBuildVector(VT, DL, Ops); } + + if (N1.getOpcode() == ISD::SPLAT_VECTOR && + isa<ConstantSDNode>(N1.getOperand(0))) + return getNode( + ISD::SPLAT_VECTOR, DL, VT, + SignExtendInReg(N1.getConstantOperandAPInt(0), + N1.getOperand(0).getValueType())); break; } case ISD::FP_TO_SINT_SAT: @@ -6868,7 +7088,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Operand is DELETED_NODE!"); // Perform various simplifications. switch (Opcode) { - case ISD::FMA: { + case ISD::FMA: + case ISD::FMAD: { assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == VT && N2.getValueType() == VT && N3.getValueType() == VT && "FMA types must match!"); @@ -6879,7 +7100,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, APFloat V1 = N1CFP->getValueAPF(); const APFloat &V2 = N2CFP->getValueAPF(); const APFloat &V3 = N3CFP->getValueAPF(); - V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + if (Opcode == ISD::FMAD) { + V1.multiply(V2, APFloat::rmNearestTiesToEven); + V1.add(V3, APFloat::rmNearestTiesToEven); + } else + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); return getConstantFP(V1, DL, VT); } break; @@ -7001,7 +7226,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } - // Memoize node if it doesn't produce a flag. + // Memoize node if it doesn't produce a glue result. SDNode *N; SDVTList VTs = getVTList(VT); SDValue Ops[] = {N1, N2, N3}; @@ -7342,7 +7567,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); } @@ -7367,14 +7592,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), VT, commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } @@ -7511,7 +7736,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = DAG.getLoad( VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); @@ -7526,7 +7751,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getStore( Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; @@ -7631,19 +7856,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, } // If this store is smaller than the largest store see whether we can get - // the smaller value for free with a truncate. + // the smaller value for free with a truncate or extract vector element and + // then store. SDValue Value = MemSetValue; if (VT.bitsLT(LargestVT)) { + unsigned Index; + unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits(); + EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts); if (!LargestVT.isVector() && !VT.isVector() && TLI.isTruncateFree(LargestVT, VT)) Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); - else + else if (LargestVT.isVector() && !VT.isVector() && + TLI.shallExtractConstSplatVectorElementToStore( + LargestVT.getTypeForEVT(*DAG.getContext()), + VT.getSizeInBits(), Index) && + TLI.isTypeLegal(SVT) && + LargestVT.getSizeInBits() == SVT.getSizeInBits()) { + // Target which can combine store(extractelement VectorTy, Idx) can get + // the smaller value for free. + SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue); + Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue, + DAG.getVectorIdxConstant(Index, dl)); + } else Value = getMemsetValue(Src, VT, DAG, dl); } assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, NewAAInfo); @@ -7717,7 +7957,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = Type::getInt8PtrTy(*getContext()); + Entry.Ty = PointerType::getUnqual(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); @@ -7819,7 +8059,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = Type::getInt8PtrTy(*getContext()); + Entry.Ty = PointerType::getUnqual(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); @@ -7933,8 +8173,6 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // FIXME: pass in SDLoc CLI.setDebugLoc(dl).setChain(Chain); - ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src); - const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero(); const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); // Helper function to create an Entry from Node and Type. @@ -7946,16 +8184,16 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, }; // If zeroing out and bzero is present, use it. - if (SrcIsZero && BzeroName) { + if (isNullConstant(Src) && BzeroName) { TargetLowering::ArgListTy Args; - Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); CLI.setLibCallee( TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); } else { TargetLowering::ArgListTy Args; - Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), @@ -8127,7 +8365,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); - // Memoize the node unless it returns a flag. + // Memoize the node unless it returns a glue result. MemIntrinsicSDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; @@ -9645,6 +9883,27 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]); return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); } + + if (VTList.VTs[0].isVector() && + VTList.VTs[0].getVectorElementType() == MVT::i1 && + VTList.VTs[1].getVectorElementType() == MVT::i1) { + SDValue F1 = getFreeze(N1); + SDValue F2 = getFreeze(N2); + // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)} + if (Opcode == ISD::UADDO || Opcode == ISD::SADDO) + return getNode(ISD::MERGE_VALUES, DL, VTList, + {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), + getNode(ISD::AND, DL, VTList.VTs[1], F1, F2)}, + Flags); + // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)} + if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) { + SDValue NotF1 = getNOT(DL, F1, VTList.VTs[0]); + return getNode(ISD::MERGE_VALUES, DL, VTList, + {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), + getNode(ISD::AND, DL, VTList.VTs[1], NotF1, F2)}, + Flags); + } + } break; } case ISD::SMUL_LOHI: @@ -9654,6 +9913,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, VTList.VTs[0] == Ops[0].getValueType() && VTList.VTs[0] == Ops[1].getValueType() && "Binary operator types must match!"); + // Constant fold. + ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]); + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]); + if (LHS && RHS) { + unsigned Width = VTList.VTs[0].getScalarSizeInBits(); + unsigned OutWidth = Width * 2; + APInt Val = LHS->getAPIntValue(); + APInt Mul = RHS->getAPIntValue(); + if (Opcode == ISD::SMUL_LOHI) { + Val = Val.sext(OutWidth); + Mul = Mul.sext(OutWidth); + } else { + Val = Val.zext(OutWidth); + Mul = Mul.zext(OutWidth); + } + Val *= Mul; + + SDValue Hi = + getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]); + SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]); + return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags); + } break; } case ISD::FFREXP: { @@ -9727,7 +10008,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, #endif } - // Memoize the node unless it returns a flag. + // Memoize the node unless it returns a glue result. SDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; @@ -10100,7 +10381,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, /// For IROrder, we keep the smaller of the two SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { + if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); } unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); @@ -10569,11 +10850,18 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { switch (N.getOpcode()) { default: break; - case ISD::ADD: + case ISD::ADD: { SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); - if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) { - uint64_t Offset = N.getConstantOperandVal(1); + if (!isa<ConstantSDNode>(N0)) { + bool RHSConstant = isa<ConstantSDNode>(N1); + uint64_t Offset; + if (RHSConstant) + Offset = N.getConstantOperandVal(1); + // We are not allowed to turn indirect debug values variadic, so + // don't salvage those. + if (!RHSConstant && DV->isIndirect()) + continue; // Rewrite an ADD constant node into a DIExpression. Since we are // performing arithmetic to compute the variable's *value* in the @@ -10582,7 +10870,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { auto *DIExpr = DV->getExpression(); auto NewLocOps = DV->copyLocationOps(); bool Changed = false; - for (size_t i = 0; i < NewLocOps.size(); ++i) { + size_t OrigLocOpsSize = NewLocOps.size(); + for (size_t i = 0; i < OrigLocOpsSize; ++i) { // We're not given a ResNo to compare against because the whole // node is going away. We know that any ISD::ADD only has one // result, so we can assume any node match is using the result. @@ -10590,19 +10879,37 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { NewLocOps[i].getSDNode() != &N) continue; NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo()); - SmallVector<uint64_t, 3> ExprOps; - DIExpression::appendOffset(ExprOps, Offset); - DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true); + if (RHSConstant) { + SmallVector<uint64_t, 3> ExprOps; + DIExpression::appendOffset(ExprOps, Offset); + DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true); + } else { + // Convert to a variadic expression (if not already). + // convertToVariadicExpression() returns a const pointer, so we use + // a temporary const variable here. + const auto *TmpDIExpr = + DIExpression::convertToVariadicExpression(DIExpr); + SmallVector<uint64_t, 3> ExprOps; + ExprOps.push_back(dwarf::DW_OP_LLVM_arg); + ExprOps.push_back(NewLocOps.size()); + ExprOps.push_back(dwarf::DW_OP_plus); + SDDbgOperand RHS = + SDDbgOperand::fromNode(N1.getNode(), N1.getResNo()); + NewLocOps.push_back(RHS); + DIExpr = DIExpression::appendOpsToArg(TmpDIExpr, ExprOps, i, true); + } Changed = true; } (void)Changed; assert(Changed && "Salvage target doesn't use N"); + bool IsVariadic = + DV->isVariadic() || OrigLocOpsSize != NewLocOps.size(); + auto AdditionalDependencies = DV->getAdditionalDependencies(); - SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr, - NewLocOps, AdditionalDependencies, - DV->isIndirect(), DV->getDebugLoc(), - DV->getOrder(), DV->isVariadic()); + SDDbgValue *Clone = getDbgValueList( + DV->getVariable(), DIExpr, NewLocOps, AdditionalDependencies, + DV->isIndirect(), DV->getDebugLoc(), DV->getOrder(), IsVariadic); ClonedDVs.push_back(Clone); DV->setIsInvalidated(); DV->setIsEmitted(); @@ -10610,6 +10917,41 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { N0.getNode()->dumprFull(this); dbgs() << " into " << *DIExpr << '\n'); } + break; + } + case ISD::TRUNCATE: { + SDValue N0 = N.getOperand(0); + TypeSize FromSize = N0.getValueSizeInBits(); + TypeSize ToSize = N.getValueSizeInBits(0); + + DIExpression *DbgExpression = DV->getExpression(); + auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, false); + auto NewLocOps = DV->copyLocationOps(); + bool Changed = false; + for (size_t i = 0; i < NewLocOps.size(); ++i) { + if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || + NewLocOps[i].getSDNode() != &N) + continue; + + NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo()); + DbgExpression = DIExpression::appendOpsToArg(DbgExpression, ExtOps, i); + Changed = true; + } + assert(Changed && "Salvage target doesn't use N"); + (void)Changed; + + SDDbgValue *Clone = + getDbgValueList(DV->getVariable(), DbgExpression, NewLocOps, + DV->getAdditionalDependencies(), DV->isIndirect(), + DV->getDebugLoc(), DV->getOrder(), DV->isVariadic()); + + ClonedDVs.push_back(Clone); + DV->setIsInvalidated(); + DV->setIsEmitted(); + LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); + dbgs() << " into " << *DbgExpression << '\n'); + break; + } } } @@ -12113,6 +12455,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, // FIXME: This does not work for vectors with elements less than 8 bits. while (VecWidth > 8) { + // If we can't split in half, stop here. + if (VecWidth & 1) + break; + unsigned HalfSize = VecWidth / 2; APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize); APInt LowValue = SplatValue.extractBits(HalfSize, 0); @@ -12130,6 +12476,12 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, VecWidth = HalfSize; } + // FIXME: The loop above only tries to split in halves. But if the input + // vector for example is <3 x i16> it wouldn't be able to detect a + // SplatBitSize of 16. No idea if that is a design flaw currently limiting + // optimizations. I guess that back in the days when this helper was created + // vectors normally was power-of-2 sized. + SplatBitSize = VecWidth; return true; } |
