diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
28 files changed, 5146 insertions, 3863 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index fbedf2c1d17a3..b3d26c41acf7c 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -14,15 +14,15 @@ add_llvm_library(LLVMSelectionDAG ScheduleDAGFast.cpp ScheduleDAGRRList.cpp ScheduleDAGSDNodes.cpp - SelectionDAG.cpp + ScheduleDAGVLIW.cpp SelectionDAGBuilder.cpp + SelectionDAG.cpp SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp + SelectionDAGTargetInfo.cpp StatepointLowering.cpp - ScheduleDAGVLIW.cpp TargetLowering.cpp - TargetSelectionDAGInfo.cpp ) add_dependencies(LLVMSelectionDAG intrinsics_gen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c741982bc08db..d888676583f38 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -112,7 +113,7 @@ namespace { /// /// This is used to allow us to reliably add any operands of a DAG node /// which have not yet been combined to the worklist. - SmallPtrSet<SDNode *, 64> CombinedNodes; + SmallPtrSet<SDNode *, 32> CombinedNodes; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -211,8 +212,8 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, SDLoc DL, + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, + SDValue ExtLoad, const SDLoc &DL, ISD::NodeType ExtType); /// Call the node-specific routine that knows how to fold each @@ -258,6 +259,7 @@ namespace { SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitBSWAP(SDNode *N); + SDValue visitBITREVERSE(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -273,6 +275,7 @@ namespace { SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); + SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); @@ -326,18 +329,19 @@ namespace { SDValue visitFMULForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); + SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, + SDValue RHS); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); - SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); - SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, - SDValue N3, ISD::CondCode CC, + SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - SDLoc DL, bool foldBooleans = true); + const SDLoc &DL, bool foldBooleans = true); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; @@ -353,19 +357,21 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags); - SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags); + SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); + SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal); + SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, - SDLoc DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); + const SDLoc &DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); @@ -386,10 +392,17 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// Do FindBetterChain for a store and any possibly adjacent stores on - /// consecutive chains. + /// Try to replace a store and any possibly adjacent stores on + /// consecutive chains with better chains. Return true only if St is + /// replaced. + /// + /// Notice that other chains may still be replaced even if the function + /// returns false. bool findBetterNeighborChains(StoreSDNode *St); + /// Match "(X shl/srl V1) & V2" where V2 may not be present. + bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -414,8 +427,7 @@ namespace { /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a /// constant build_vector of the stored constant values in Stores. - SDValue getMergedConstantVectorStore(SelectionDAG &DAG, - SDLoc SL, + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores, SmallVectorImpl<SDValue> &Chains, EVT Ty) const; @@ -444,6 +456,12 @@ namespace { StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); + /// Helper function for MergeConsecutiveStores. Checks if + /// Candidate stores have indirect dependency through their + /// operands. \return True if safe to merge + bool checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -747,32 +765,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -/// Returns true if N is a BUILD_VECTOR node whose -/// elements are all the same constant or undefined. -static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { - BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); - if (!C) - return false; - - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) && - EltVT.getSizeInBits() >= SplatBitSize); -} - -// \brief Returns the SDNode if it is a constant integer BuildVector -// or constant integer. -static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { - if (isa<ConstantSDNode>(N)) - return N.getNode(); - if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) - return N.getNode(); - return nullptr; -} - // \brief Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { @@ -821,12 +813,12 @@ static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { return nullptr; } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, - SDValue N0, SDValue N1) { +SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); @@ -845,17 +837,17 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); if (!OpNode.getNode()) return SDValue(); AddToWorklist(OpNode.getNode()); @@ -962,7 +954,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; SDLoc dl(Op); - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + if (ISD::isUNINDEXEDLoad(Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD @@ -1166,6 +1159,9 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (!LegalOperations) return false; + if (!ISD::isUNINDEXEDLoad(Op.getNode())) + return false; + EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return false; @@ -1259,8 +1255,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // while the worklist isn't empty, find a node and - // try and combine it. + // While the worklist isn't empty, find a node and try to combine it. while (!WorklistMap.empty()) { SDNode *N; // The Worklist holds the SDNodes in order, but it may contain null entries. @@ -1326,8 +1321,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); - // Transfer debug value. - DAG.TransferDbgValues(SDValue(N, 0), RV); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1388,6 +1381,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ROTR: case ISD::ROTL: return visitRotate(N); case ISD::BSWAP: return visitBSWAP(N); + case ISD::BITREVERSE: return visitBITREVERSE(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1403,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); + case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1628,8 +1623,8 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } -/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a -/// ContantSDNode pointer else nullptr. +/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a +/// ConstantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); return Const != nullptr && !Const->isOpaque() ? Const : nullptr; @@ -1653,38 +1648,32 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold (add x, undef) -> undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; - // fold (add c1, c2) -> c1+c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); - // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + // canonicalize constant to RHS + if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + // fold (add c1, c2) -> c1+c2 + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; - // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) - if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && - GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, - GA->getOffset() + - (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A - if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { - SDLoc DL(N); - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), DL, VT), - N0.getOperand(1)); - } + if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { + if (N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), DL, VT), + N0.getOperand(1)); + } + } // reassociate add if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; @@ -1850,9 +1839,9 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. -static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, - bool LegalOperations, bool LegalTypes) { +static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, bool LegalOperations, + bool LegalTypes) { if (!VT.isVector()) return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) @@ -1879,11 +1868,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); - // fold (sub c1, c2) -> c1-c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // fold (sub c1, c2) -> c1-c2 + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) { SDLoc DL(N); @@ -1933,9 +1925,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; // If the relocation model supports it, consider symbol offsets. @@ -2013,7 +2005,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { EVT VT = N0.getValueType(); // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; @@ -2026,8 +2018,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); - N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); } else { N0IsConst = isa<ConstantSDNode>(N0); if (N0IsConst) { @@ -2047,8 +2039,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getNode(), N1.getNode()); // canonicalize constant to RHS (vector doesn't have to splat) - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -2091,23 +2083,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N1IsConst && N0.getOpcode() == ISD::SHL && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1)))) { - SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, - N1, N0.getOperand(1)); + (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorklist(C3.getNode()); - return DAG.getNode(ISD::MUL, SDLoc(N), VT, - N0.getOperand(0), C3); + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(nullptr,0), Y(nullptr,0); + SDValue Sh(nullptr, 0), Y(nullptr, 0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1))) && + (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -2117,17 +2107,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, - Mul, Sh.getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (isConstantIntBuildVectorOrConstantInt(N1) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && N0.getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, DAG.getNode(ISD::MUL, SDLoc(N0), VT, @@ -2146,7 +2134,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { + EVT NodeType = Node->getValueType(0); + if (!NodeType.isSimple()) + return false; + switch (NodeType.getSimpleVT().SimpleTy) { default: return false; // No libcall for vector types. case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2163,14 +2154,18 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { if (Node->use_empty()) return SDValue(); // This is a dead node, leave it alone. + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + + // DivMod lib calls can still work on non-legal types if using lib-calls. EVT VT = Node->getValueType(0); - if (!TLI.isTypeLegal(VT)) + if (VT.isVector() || !VT.isInteger()) return SDValue(); - unsigned Opcode = Node->getOpcode(); - bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) + return SDValue(); - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; // If DIVREM is going to get expanded into a libcall, // but there is no libcall available, then don't combine. if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && @@ -2314,10 +2309,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DivRem; // undef / X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2378,10 +2373,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return DivRem; // undef / X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2419,15 +2414,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) { } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, - VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); - } + ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0)); + if (SHC && SHC->getAPIntValue().isPowerOf2()) { + APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } } @@ -2462,10 +2455,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) { return DivRem.getValue(1); // undef % X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X % undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2489,7 +2482,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { getShiftAmountTy(N0.getValueType()))); } // fold (mulhs x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply @@ -2525,7 +2518,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (isOneConstant(N1)) return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply @@ -2698,8 +2691,8 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); @@ -2761,7 +2754,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) - // Only perform this optimization after type legalization and before + // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and // we don't want to undo this promotion. @@ -2769,7 +2762,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && - Level == AfterLegalizeTypes) { + Level <= AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); @@ -2814,7 +2807,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. - if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else @@ -2829,13 +2822,13 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0->getOperand(0), N1->getOperand(0)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, - &SVN0->getMask()[0]); + SVN0->getMask()); } // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. ShOp = N0->getOperand(0); - if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else @@ -2850,7 +2843,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0->getOperand(1), N1->getOperand(1)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, - &SVN0->getMask()[0]); + SVN0->getMask()); } } } @@ -2867,7 +2860,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, EVT VT = N1.getValueType(); // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(LocReference), VT); // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) SDValue LL, LR, RL, RR, CC0, CC1; @@ -2965,6 +2958,50 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, } } + // Reduce bit extract of low half of an integer to the narrower type. + // (and (srl i64:x, K), KMask) -> + // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Size = VT.getSizeInBits(); + const APInt &AndMask = CAnd->getAPIntValue(); + unsigned ShiftBits = CShift->getZExtValue(); + unsigned MaskBits = AndMask.countTrailingOnes(); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); + + if (APIntOps::isMask(AndMask) && + // Required bits must not span the two halves of the integer and + // must fit in the half size type. + (ShiftBits + MaskBits <= Size / 2) && + TLI.isNarrowingProfitable(VT, HalfVT) && + TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && + TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && + TLI.isTruncateFree(VT, HalfVT) && + TLI.isZExtFree(HalfVT, VT)) { + // The isNarrowingProfitable is to avoid regressions on PPC and + // AArch64 which match a few 64-bit bit insert / bit extract patterns + // on downstream users of this. Those patterns could probably be + // extended to handle extensions mixed in. + + SDValue SL(N0); + assert(ShiftBits != 0 && MaskBits <= Size); + + // Extracting the highest bit of the low half. + EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, + N0.getOperand(0)); + + SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); + SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); + SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); + SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); + return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); + } + } + } + } + return SDValue(); } @@ -3045,8 +3082,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (isAllOnesConstant(N1)) @@ -3090,8 +3127,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // the 'X' node here can either be nothing or an extract_vector_elt to catch // more cases. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - N0.getOperand(0).getOpcode() == ISD::LOAD) || - N0.getOpcode() == ISD::LOAD) { + N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && + N0.getOperand(0).getOpcode() == ISD::LOAD && + N0.getOperand(0).getResNo() == 0) || + (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0) ); @@ -3234,12 +3273,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(NewPtr.getNode()); - SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), NewPtr, - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Alignment, LN0->getAAInfo()); + SDValue Load = DAG.getExtLoad( + ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, + LN0->getPointerInfo(), ExtVT, Alignment, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3303,9 +3340,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { - SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), - N0.getOperand(1), false); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) return BSwap; } @@ -3576,7 +3612,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N1.getValueType(); // fold (or x, undef) -> -1 if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + (N0.isUndef() || N1.isUndef())) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), SDLoc(LocReference), VT); @@ -3697,59 +3733,70 @@ SDValue DAGCombiner::visitOR(SDNode *N) { N1.getValueType().getScalarType().getSizeInBits()), SDLoc(N), N1.getValueType()); - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. if (isa<ShuffleVectorSDNode>(N0) && isa<ShuffleVectorSDNode>(N1) && // Avoid folding a node with illegal type. - TLI.isTypeLegal(VT) && - N0->getOperand(1) == N1->getOperand(1) && - ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { - bool CanFold = true; - unsigned NumElts = VT.getVectorNumElements(); - const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); - const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); - // We construct two shuffle masks: - // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand - // and N1 as the second operand. - // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand - // and N0 as the second operand. - // We do this because OR is commutable and therefore there might be - // two ways to fold this node into a shuffle. - SmallVector<int,4> Mask1; - SmallVector<int,4> Mask2; - - for (unsigned i = 0; i != NumElts && CanFold; ++i) { - int M0 = SV0->getMaskElt(i); - int M1 = SV1->getMaskElt(i); - - // Both shuffle indexes are undef. Propagate Undef. - if (M0 < 0 && M1 < 0) { - Mask1.push_back(M0); - Mask2.push_back(M0); - continue; - } + TLI.isTypeLegal(VT)) { + bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); + bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); + bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); + // Ensure both shuffles have a zero input. + if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) { + assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); + assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + bool CanFold = true; + int NumElts = VT.getVectorNumElements(); + SmallVector<int, 4> Mask(NumElts); + + for (int i = 0; i != NumElts; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Determine if either index is pointing to a zero vector. + bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); + bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); + + // If one element is zero and the otherside is undef, keep undef. + // This also handles the case that both are undef. + if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { + Mask[i] = -1; + continue; + } - if (M0 < 0 || M1 < 0 || - (M0 < (int)NumElts && M1 < (int)NumElts) || - (M0 >= (int)NumElts && M1 >= (int)NumElts)) { - CanFold = false; - break; + // Make sure only one of the elements is zero. + if (M0Zero == M1Zero) { + CanFold = false; + break; + } + + assert((M0 >= 0 || M1 >= 0) && "Undef index!"); + + // We have a zero and non-zero element. If the non-zero came from + // SV0 make the index a LHS index. If it came from SV1, make it + // a RHS index. We need to mod by NumElts because we don't care + // which operand it came from in the original shuffles. + Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; } - Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); - Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); - } + if (CanFold) { + SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); + SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - if (CanFold) { - // Fold this sequence only if the resulting shuffle is 'legal'. - if (TLI.isShuffleMaskLegal(Mask1, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), - N1->getOperand(0), &Mask1[0]); - if (TLI.isShuffleMaskLegal(Mask2, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), - N0->getOperand(0), &Mask2[0]); + bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(NewLHS, NewRHS); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + } + + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + } } } } @@ -3760,8 +3807,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (isNullConstant(N1)) @@ -3817,9 +3864,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } /// Match "(X shl/srl V1) & V2" where V2 may not be present. -static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { +bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -3946,7 +3993,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, - unsigned NegOpcode, SDLoc DL) { + unsigned NegOpcode, const SDLoc &DL) { // fold (or (shl x, (*ext y)), // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) @@ -3967,7 +4014,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return nullptr; @@ -4093,12 +4140,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). - if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; // fold (xor c1, c2) -> c1^c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); @@ -4106,8 +4153,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (isNullConstant(N1)) @@ -4342,8 +4389,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = + distributeTruncateThroughAnd(N->getOperand(1).getNode())) return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), N->getOperand(0), NewOp1); } @@ -4398,7 +4445,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && N1C->isNullValue()) return N0; // fold (shl undef, x) -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), @@ -4407,8 +4454,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } @@ -4541,7 +4587,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { APInt Val; if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && (isa<ConstantSDNode>(N0.getOperand(1)) || - isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); @@ -4637,7 +4683,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); // Determine the residual right-shift amount. - signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal @@ -4664,8 +4710,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); } @@ -4916,7 +4961,7 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { EVT VT = N->getValueType(0); // fold (bswap c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) @@ -4924,12 +4969,21 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (bitreverse (bitreverse x)) -> x + if (N0.getOpcode() == ISD::BITREVERSE) + return N0.getOperand(0); + return SDValue(); +} + SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4939,7 +4993,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4949,7 +5003,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4959,7 +5013,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4969,15 +5023,15 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } /// \brief Generate Min/Max node -static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, - SDValue True, SDValue False, +static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG) { if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) @@ -5237,7 +5291,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { // length of the BV and see if all the non-undef nodes are the same. ConstantSDNode *BottomHalf = nullptr; for (int i = 0; i < NumElems / 2; ++i) { - if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + if (Cond->getOperand(i)->isUndef()) continue; if (BottomHalf == nullptr) @@ -5249,7 +5303,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { // Do the same for the second half of the BuildVector ConstantSDNode *TopHalf = nullptr; for (int i = NumElems / 2; i < NumElems; ++i) { - if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + if (Cond->getOperand(i)->isUndef()) continue; if (TopHalf == nullptr) @@ -5666,9 +5720,8 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), - N0, N1, CC, SDLoc(N), false); - if (SCC.getNode()) { + if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, + CC, SDLoc(N), false)) { AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { @@ -5676,7 +5729,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // cond always true -> true val else return N3; // cond always false -> false val - } else if (SCC->getOpcode() == ISD::UNDEF) { + } else if (SCC->isUndef()) { // When the condition is UNDEF, just return the first operand. This is // coherent the DAG creation, no setcc node is created in this case return N2; @@ -5729,7 +5782,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 @@ -5756,7 +5810,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, for (unsigned i=0; i != NumElts; ++i) { SDValue Op = N0->getOperand(i); - if (Op->getOpcode() == ISD::UNDEF) { + if (Op->isUndef()) { Elts.push_back(DAG.getUNDEF(SVT)); continue; } @@ -5771,7 +5825,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); + return DAG.getBuildVector(VT, DL, Elts).getNode(); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -5839,8 +5893,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, SDLoc DL, - ISD::NodeType ExtType) { + SDValue Trunc, SDValue ExtLoad, + const SDLoc &DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { SDNode *SetCC = SetCCs[i]; @@ -5929,9 +5983,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue SplitLoad = DAG.getExtLoad( ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, - LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, - LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), - Align, LN0->getAAInfo()); + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, DL, BasePtr.getValueType())); @@ -6150,11 +6203,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDLoc DL(N); SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - NegOne, DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) return SCC; + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), NegOne, + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); @@ -6436,56 +6489,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { - EVT N0VT = N0.getOperand(0).getValueType(); - if (getSetCCResultType(N0VT) == N0.getValueType()) + EVT N00VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N00VT) == N0.getValueType()) return SDValue(); - // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. - // Only do this before legalize for now. - EVT EltVT = VT.getVectorElementType(); + // We know that the # elements of the results is the same as the # + // elements of the compare (and the # elements of the compare result for + // that matter). Check to see that they are the same size. If so, we know + // that the element size of the sext'd result matches the element size of + // the compare operands. SDLoc DL(N); - SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), - DAG.getConstant(1, DL, EltVT)); - if (VT.getSizeInBits() == N0VT.getSizeInBits()) - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - return DAG.getNode(ISD::AND, DL, VT, - DAG.getSetCC(DL, VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - OneOps)); + SDValue VecOnes = DAG.getConstant(1, DL, VT); + if (VT.getSizeInBits() == N00VT.getSizeInBits()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); + } // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + // truncate/sign extend. + EVT MatchingElementType = EVT::getIntegerVT( + *DAG.getContext(), N00VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = EVT::getVectorVT( + *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, DL, VT, - DAG.getSExtOrTrunc(VsetCC, DL, VT), - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); + DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), + VecOnes); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) return SCC; + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; } // (zext (shl (zext x), cst)) -> (shl (zext x), cst) @@ -6660,11 +6705,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) return SCC; } @@ -6854,15 +6898,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, + NewAlign, LN0->getMemOperand()->getFlags(), + LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); @@ -6902,7 +6945,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getUNDEF(VT); // fold (sext_in_reg c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. @@ -6988,9 +7031,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { - SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), - N0.getOperand(1), false); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -7002,7 +7044,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.isUndef()) return DAG.getUNDEF(VT); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, @@ -7021,7 +7077,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -7030,12 +7086,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + // if the source is smaller than the dest, we still need an extend. if (N0.getOperand(0).getValueType().bitsLT(VT)) - // if the source is smaller than the dest, we still need an extend - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + // if the source is larger than the dest, than we just need the truncate. if (N0.getOperand(0).getValueType().bitsGT(VT)) - // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. @@ -7071,12 +7126,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), - NVT, N0.getOperand(0)); - SDLoc DL(N); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, TrTy, V, + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, + DAG.getBitcast(NVT, N0.getOperand(0)), DAG.getConstant(Index, DL, IndexTy)); } } @@ -7094,6 +7146,25 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 + if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && + TLI.isTypeDesirableForOp(ISD::SHL, VT)) { + if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t Amt = CAmt->getZExtValue(); + unsigned Size = VT.getSizeInBits(); + + if (Amt < Size / 2) { + SDLoc SL(N); + EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SHL, SL, VT, Trunc, + DAG.getConstant(Amt, SL, AmtVT)); + } + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -7121,7 +7192,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + return DAG.getBuildVector(VT, SDLoc(N), Opnds); } } @@ -7131,10 +7202,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { - SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits())); - if (Shorter.getNode()) + if (SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits()))) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) @@ -7168,7 +7238,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { SDValue X = N0.getOperand(i); - if (X.getOpcode() != ISD::UNDEF) { + if (!X.isUndef()) { V = X; Idx = i; NumDefs++; @@ -7200,6 +7270,24 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // Fold truncate of a bitcast of a vector to an extract of the low vector + // element. + // + // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0 + if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { + SDValue VecSrc = N0.getOperand(0); + EVT SrcVT = VecSrc.getValueType(); + if (SrcVT.isVector() && SrcVT.getScalarType() == VT && + (!LegalOperations || + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { + SDLoc SL(N); + + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, + VecSrc, DAG.getConstant(0, SL, IdxVT)); + } + } + // Simplify the operands using demanded-bits information. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) @@ -7226,23 +7314,17 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - - if (ISD::isNON_EXTLoad(LD2) && - LD2->hasOneUse() && - // If both are volatile this would reduce the number of volatile loads. - // If one is volatile it might be ok, but play conservative and bail out. - !LD1->isVolatile() && - !LD2->isVolatile() && - DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { + unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; + if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), - LD1->getBasePtr(), LD1->getPointerInfo(), - false, false, false, Align); + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), Align); } return SDValue(); @@ -7254,6 +7336,49 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { return DAG.getDataLayout().isBigEndian() ? 1 : 0; } +static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // If this is not a bitcast to an FP type or if the target doesn't have + // IEEE754-compliant FP logic, we're done. + EVT VT = N->getValueType(0); + if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) + return SDValue(); + + // TODO: Use splat values for the constant-checking below and remove this + // restriction. + SDValue N0 = N->getOperand(0); + EVT SourceVT = N0.getValueType(); + if (SourceVT.isVector()) + return SDValue(); + + unsigned FPOpcode; + APInt SignMask; + switch (N0.getOpcode()) { + case ISD::AND: + FPOpcode = ISD::FABS; + SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); + break; + case ISD::XOR: + FPOpcode = ISD::FNEG; + SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); + break; + // TODO: ISD::OR --> ISD::FNABS? + default: + return SDValue(); + } + + // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X + // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + SDValue LogicOp0 = N0.getOperand(0); + ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && + LogicOp0.getOpcode() == ISD::BITCAST && + LogicOp0->getOperand(0).getValueType() == VT) + return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7284,13 +7409,12 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { TLI.isOperationLegal(ISD::ConstantFP, VT)) || (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && TLI.isOperationLegal(ISD::Constant, VT))) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); + return DAG.getBitcast(VT, N0); } // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getBitcast(VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) // If the resultant load doesn't need a higher alignment than the original! @@ -7303,21 +7427,24 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); - if (Align <= OrigAlign) { - SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign, - LN0->getAAInfo()); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + LN0->getAddressSpace(), OrigAlign, &Fast) && + Fast) { + SDValue Load = + DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), OrigAlign, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } + if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) + return V; + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // @@ -7334,8 +7461,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { - SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, - N0.getOperand(0)); + SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(NewConv.getNode()); SDLoc DL(N); @@ -7388,8 +7514,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), - IntXVT, N0.getOperand(1)); + SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. @@ -7412,11 +7537,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); - SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, - N0.getOperand(0)); + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); - SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, - N0.getOperand(1)); + SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); AddToWorklist(X.getNode()); SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); AddToWorklist(XorResult.getNode()); @@ -7439,8 +7562,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), - VT, N0.getOperand(0)); + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); @@ -7472,7 +7594,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return DAG.getBitcast(VT, Op); return SDValue(); }; @@ -7529,8 +7651,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - DAG.getNode(ISD::BITCAST, SDLoc(BV), - DstEltVT, BV->getOperand(0))); + DAG.getBitcast(DstEltVT, BV->getOperand(0))); SmallVector<SDValue, 8> Ops; for (SDValue Op : BV->op_values()) { @@ -7538,11 +7659,10 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), - DstEltVT, Op)); + Ops.push_back(DAG.getBitcast(DstEltVT, Op)); AddToWorklist(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getBuildVector(VT, SDLoc(BV), Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to @@ -7584,7 +7704,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Shift the previously computed bits over. NewBits <<= SrcBitSize; SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); - if (Op.getOpcode() == ISD::UNDEF) continue; + if (Op.isUndef()) continue; EltIsUndef = false; NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). @@ -7598,7 +7718,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } // Finally, this must be the case where we are shrinking elements: each input @@ -7609,7 +7729,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { SmallVector<SDValue, 8> Ops; for (const SDValue &Op : BV->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } @@ -7628,7 +7748,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } /// Try to perform FMA combining on a given FADD node. @@ -7654,6 +7774,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + ; + if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -7837,6 +7962,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -8305,7 +8434,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { AddToWorklist(Fused.getNode()); return Fused; } - return SDValue(); } @@ -8662,7 +8790,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. - APFloat N1APF = N1CFP->getValueAPF(); + const APFloat &N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); // Only do the transform if the reciprocal is a legal fp immediate that @@ -8681,12 +8809,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); @@ -8694,7 +8822,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); @@ -8715,7 +8843,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); @@ -8772,27 +8900,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - - // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); - if (!RV) - return SDValue(); - - EVT VT = RV.getValueType(); - SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); - AddToWorklist(RV.getNode()); - - // Unfortunately, RV is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - AddToWorklist(RV.getNode()); - - return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, - ZeroCmp, Zero, RV); + return buildSqrtEstimate(N->getOperand(0), &Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -8868,7 +8976,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8922,7 +9030,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8993,9 +9101,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { } if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); - if (SrcVT == VT) - return Src; - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); + return DAG.getBitcast(VT, Src); } return SDValue(); } @@ -9040,6 +9146,17 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + + // Skip this folding if it results in an fp_round from f80 to f16. + // + // f80 to f16 always generates an expensive (and as yet, unimplemented) + // libcall to __truncxfhf2 instead of selecting native f16 conversion + // instructions from f32 or f64. Moreover, the first (value-preserving) + // fp_round from f80 to either f32 or f64 may become a NOP in platforms like + // x86. + if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) + return SDValue(); + // If the first fp_round isn't a value preserving truncation, it might // introduce a tie in the second fp_round, that wouldn't occur in the // single-step fp_round we want to fold to. @@ -9198,7 +9315,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); + return DAG.getBitcast(VT, Int); } } @@ -9303,7 +9420,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { Int = DAG.getNode(ISD::AND, DL, IntVT, Int, DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); + return DAG.getBitcast(N->getValueType(0), Int); } } @@ -9607,6 +9724,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; } + // Caches for hasPredecessorHelper. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + Worklist.push_back(N); + // If the offset is a constant, there may be other adds of constants that // can be folded with this one. We should do this to avoid having to keep // a copy of the original base pointer. @@ -9621,7 +9743,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (Use.getUser()->isPredecessorOf(N)) + if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) continue; if (Use.getUser()->getOpcode() != ISD::ADD && @@ -9651,14 +9773,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; - // Caches for hasPredecessorHelper - SmallPtrSet<const SDNode *, 32> Visited; - SmallVector<const SDNode *, 16> Worklist; - for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; - if (N->hasPredecessorHelper(Use, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) return false; // If Ptr may be folded in addressing mode of other use, then it's @@ -9720,7 +9838,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ConstantSDNode *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); int X0, X1, Y0, Y1; - APInt Offset0 = CN->getAPIntValue(); + const APInt &Offset0 = CN->getAPIntValue(); APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; @@ -9984,13 +10102,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { - SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), - LD->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), Align, LD->getAAInfo()); + SDValue NewLoad = DAG.getExtLoad( + LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), LD->getMemoryVT(), Align, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); if (NewLoad.getNode() != N) return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } @@ -10208,7 +10323,7 @@ struct LoadedSlice { return false; // Offsets are for indexed load only, we do not handle that. - if (Origin->getOffset().getOpcode() != ISD::UNDEF) + if (!Origin->getOffset().isUndef()) return false; const TargetLowering &TLI = DAG->getTargetLoweringInfo(); @@ -10291,10 +10406,10 @@ struct LoadedSlice { EVT SliceType = getLoadedType(); // Create the load for the slice. - SDValue LastInst = DAG->getLoad( - SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, - Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), - Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + SDValue LastInst = + DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), + getAlignment(), Origin->getMemOperand()->getFlags()); // If the final type is not the same as the loaded type, this means that // we have to pad with zero. Create a zero extend for that. EVT FinalType = Inst->getValueType(0); @@ -10718,9 +10833,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; - return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), - false, false, NewAlign).getNode(); + return DAG + .getStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), NewAlign) + .getNode(); } @@ -10826,19 +10942,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, SDLoc(LD), Ptr.getValueType())); - SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), - LD->getChain(), NewPtr, - LD->getPointerInfo().getWithOffset(PtrOff), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign, - LD->getAAInfo()); + SDValue NewLD = + DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, + LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, SDLoc(Value), NewVT)); - SDValue NewST = DAG.getStore(Chain, SDLoc(N), - NewVal, NewPtr, - ST->getPointerInfo().getWithOffset(PtrOff), - false, false, NewAlign); + SDValue NewST = + DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, + ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); AddToWorklist(NewPtr.getNode()); AddToWorklist(NewLD.getNode()); @@ -10887,15 +11000,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); - SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), - LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - false, false, false, LDAlign); + SDValue NewLD = + DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LDAlign); - SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), - NewLD, ST->getBasePtr(), - ST->getPointerInfo(), - false, false, STAlign); + SDValue NewST = + DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), + ST->getPointerInfo(), STAlign); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); @@ -10940,9 +11051,23 @@ struct BaseIndexOffset { } /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr) { + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { bool IsIndexSignExt = false; + // Split up a folded GlobalAddress+Offset into its component parts. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) + if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { + return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), + SDLoc(GA), + GA->getValueType(0), + /*Offset=*/0, + /*isTargetGA=*/false, + GA->getTargetFlags()), + SDValue(), + GA->getOffset(), + IsIndexSignExt); + } + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. @@ -11063,7 +11188,7 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, // multiply (CONST * A) after we also do the same transformation // to the "t2" instruction. if (OtherOp->getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && OtherOp->getOperand(0).getNode() == MulVar) return true; } @@ -11073,11 +11198,9 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } -SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, - SDLoc SL, - ArrayRef<MemOpLink> Stores, - SmallVectorImpl<SDValue> &Chains, - EVT Ty) const { +SDValue DAGCombiner::getMergedConstantVectorStore( + SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const { SmallVector<SDValue, 8> BuildVector; for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { @@ -11086,7 +11209,7 @@ SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, BuildVector.push_back(St->getValue()); } - return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); + return DAG.getBuildVector(Ty, SL, BuildVector); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( @@ -11182,29 +11305,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), - false, false, FirstInChain->getAlignment()); - // Replace the last store with the new store - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumStores; ++i) { - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + if (UseAA) { + // Replace all merged stores with the new store. + for (unsigned i = 0; i < NumStores; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + } else { + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumStores; ++i) { + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } } return true; @@ -11215,14 +11345,14 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return; // Do not handle stores to undef base pointers. - if (BasePtr.Base.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.isUndef()) return; // Walk up the chain and look for nodes with offsets from the same @@ -11253,7 +11383,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (OtherST->getMemoryVT() != MemVT) continue; - BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG); if (Ptr.equalBaseIndex(BasePtr)) StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); @@ -11269,7 +11399,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) @@ -11280,9 +11410,8 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( break; // No truncation. - if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) - if (St->isTruncatingStore()) - break; + if (Index->isTruncatingStore()) + break; // The stored memory type must be the same. if (Index->getMemoryVT() != MemVT) @@ -11326,6 +11455,30 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( } } +// We need to check that merging these stores does not cause a loop +// in the DAG. Any store candidate may depend on another candidate +// indirectly through its operand (we already consider dependencies +// through the chain). Check in parallel by searching up from +// non-chain operands of candidates. +bool DAGCombiner::checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes) { + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 8> Worklist; + // search ops of store candidates + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + SDNode *n = StoreNodes[i].MemNode; + // Potential loops may happen only through non-chain operands + for (unsigned j = 1; j < n->getNumOperands(); ++j) + Worklist.push_back(n->getOperand(j).getNode()); + } + // search through DAG. We can stop early if we find a storenode + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist)) + return false; + } + return true; +} + bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (OptLevel == CodeGenOpt::None) return false; @@ -11379,6 +11532,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (StoreNodes.size() < 2) return false; + // only do dep endence check in AA case + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) + return false; + // Sort the memory operands according to their distance from the // base pointer. As a secondary criteria: make sure stores coming // later in the code come first in the list. This is important for @@ -11557,7 +11716,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); // If this is not the first ptr that we check. if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. @@ -11690,16 +11849,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The merged loads are required to have the same incoming chain, so // using the first's chain is acceptable. - SDValue NewLoad = DAG.getLoad( - JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign); SDValue NewStoreChain = DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); - SDValue NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); + SDValue NewStore = + DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { @@ -11708,16 +11867,22 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { SDValue(NewLoad.getNode(), 1)); } - // Replace the last store with the new store. - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { - // Remove all Store nodes. - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + if (UseAA) { + // Replace the all stores with the new store. + for (unsigned i = 0; i < NumElem; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + } else { + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } } return true; @@ -11808,21 +11973,17 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); - SDValue St0 = DAG.getStore(Chain, DL, Lo, - Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, - ST->getAlignment(), AAInfo); + SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), + ST->getAlignment(), MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, DL, Hi, - Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, - Alignment, AAInfo); + SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, + ST->getPointerInfo().getWithOffset(4), + Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -11841,21 +12002,24 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { - unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - SVT.getTypeForEVT(*DAG.getContext())); - if (Align <= OrigAlign && - ((!LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign, - ST->getAAInfo()); + if (((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { + unsigned OrigAlign = ST->getAlignment(); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, + ST->getAddressSpace(), OrigAlign, &Fast) && + Fast) { + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getPointerInfo(), OrigAlign, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + } + } } // Turn 'store undef, Ptr' -> nothing. - if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + if (Value.isUndef() && ST->isUnindexed()) return Chain; // Try to infer better alignment information than the store already has. @@ -11863,10 +12027,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) { SDValue NewStore = - DAG.getTruncStore(Chain, SDLoc(N), Value, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align, - ST->getAAInfo()); + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), + ST->getMemoryVT(), Align, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); if (NewStore.getNode() != N) return CombineTo(ST, NewStore, true); } @@ -11898,6 +12061,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // manipulation. Return the original node to not do anything else. return SDValue(ST, 0); } + Chain = ST->getChain(); } // Try transforming N to an indexed store. @@ -12001,7 +12165,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); // If the inserted element is an UNDEF, just use the input vector. - if (InVal.getOpcode() == ISD::UNDEF) + if (InVal.isUndef()) return InVec; EVT VT = InVec.getValueType(); @@ -12045,7 +12209,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); - } else if (InVec.getOpcode() == ISD::UNDEF) { + } else if (InVec.isUndef()) { unsigned NElts = VT.getVectorNumElements(); Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); } else { @@ -12065,11 +12229,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } // Return the new vector - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + assert(!OriginalLoad->isVolatile()); + EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); @@ -12115,21 +12281,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad( - ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, - VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, + OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, + Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad( - VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, - OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, + MPI, Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); Chain = Load.getValue(1); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); else - Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + Load = DAG.getBitcast(ResultVT, Load); } WorklistRemover DeadNodes(*this); SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; @@ -12183,6 +12348,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } + // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x) + if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && + ConstEltNo->isNullValue() && VT.isInteger()) { + SDValue BCSrc = InVec.getOperand(0); + if (BCSrc.getValueType().isScalarInteger()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); + } + + // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val + // + // This only really matters if the index is non-constant since other combines + // on the constant elements already work. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && + EltNo == InVec.getOperand(2)) { + SDValue Elt = InVec.getOperand(1); + return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt; + } + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD @@ -12256,9 +12439,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(InVec.getNode()) && !N->getOperand(1)->hasPredecessor(InVec.getNode())) { SDValue Index = N->getOperand(1); - if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, - OrigLoad); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) { + if (!OrigLoad->isVolatile()) { + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -12358,7 +12544,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. - if (In.getOpcode() == ISD::UNDEF) continue; + if (In.isUndef()) continue; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; @@ -12413,9 +12599,9 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { SDValue Cast = N->getOperand(i); assert((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + Cast.isUndef()) && "Invalid cast opcode"); SDValue In; - if (Cast.getOpcode() == ISD::UNDEF) + if (Cast.isUndef()) In = DAG.getUNDEF(SourceType); else In = Cast->getOperand(0); @@ -12434,12 +12620,12 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); + SDValue BV = DAG.getBuildVector(VecVT, dl, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorklist(BV.getNode()); // Bitcast to the desired type. - return DAG.getNode(ISD::BITCAST, dl, VT, BV); + return DAG.getBitcast(VT, BV); } SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { @@ -12502,12 +12688,12 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); - if (In.getOpcode() == ISD::UNDEF) + if (In.isUndef()) Opnds.push_back(DAG.getUNDEF(SrcVT)); else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); + SDValue BV = DAG.getBuildVector(NVT, dl, Opnds); AddToWorklist(BV.getNode()); return DAG.getNode(Opcode, dl, VT, BV); @@ -12545,7 +12731,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue Op = N->getOperand(i); // Ignore undef inputs. - if (Op.getOpcode() == ISD::UNDEF) continue; + if (Op.isUndef()) continue; // See if we can combine this build_vector into a blend with a zero vector. if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { @@ -12681,7 +12867,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue Ops[2]; Ops[0] = VecIn1; Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask); } return SDValue(); @@ -12735,18 +12921,17 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { for (SDValue &Op : Ops) { if (Op.getValueType() == SVT) continue; - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) Op = ScalarUndef; else - Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + Op = DAG.getBitcast(SVT, Op); } } } EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, VT.getSizeInBits() / SVT.getSizeInBits()); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); + return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR @@ -12768,7 +12953,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { Op = Op.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } @@ -12788,7 +12973,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { ExtVec = ExtVec.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. - if (ExtVec.getOpcode() == ISD::UNDEF) { + if (ExtVec.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } @@ -12812,11 +12997,11 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { return SDValue(); // At most we can reference 2 inputs in the final shuffle. - if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + if (SV0.isUndef() || SV0 == ExtVec) { SV0 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx); - } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + } else if (SV1.isUndef() || SV1 == ExtVec) { SV1 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx + NumElts); @@ -12844,7 +13029,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // Optimize concat_vectors where all but the first of the vectors are undef. if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { - return Op.getOpcode() == ISD::UNDEF; + return Op.isUndef(); })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -12874,7 +13059,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDLoc dl = SDLoc(N); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); - return DAG.getNode(ISD::BITCAST, dl, VT, Res); + return DAG.getBitcast(VT, Res); } } @@ -12885,9 +13070,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { auto IsBuildVectorOrUndef = [](const SDValue &Op) { return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); }; - bool AllBuildVectorsOrUndefs = - std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); - if (AllBuildVectorsOrUndefs) { + if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { SmallVector<SDValue, 8> Opnds; EVT SVT = VT.getScalarType(); @@ -12926,7 +13109,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { assert(VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + return DAG.getBuildVector(VT, SDLoc(N), Opnds); } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. @@ -12948,7 +13131,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; // Check if this is the identity extract: @@ -13033,11 +13216,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // otherwise => (extract_subvec V1, ExtIdx) if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) - return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, - DAG.getNode(ISD::BITCAST, dl, - N->getOperand(0).getValueType(), - V->getOperand(0)), N->getOperand(1)); + return DAG.getBitcast(NVT, V->getOperand(1)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), + N->getOperand(1)); } } @@ -13148,7 +13331,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // Special case: shuffle(concat(A,B)) can be more efficiently represented // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high // half vector elements. - if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && std::all_of(SVN->getMask().begin() + NumElemsPerConcat, SVN->getMask().end(), [](int i) { return i == -1; })) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), @@ -13204,7 +13387,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef - if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() && N1.isUndef()) return DAG.getUNDEF(VT); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); @@ -13217,29 +13400,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), - &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); } // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. - if (N0.getOpcode() == ISD::UNDEF) { - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if (Idx >= 0) { - if (Idx >= (int)NumElts) - Idx -= NumElts; - else - Idx = -1; // remove reference to lhs - } - NewMask.push_back(Idx); - } - return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), - &NewMask[0]); - } + if (N0.isUndef()) + return DAG.getCommutedVectorShuffle(*SVN); // Remove references to rhs if it is undef - if (N1.getOpcode() == ISD::UNDEF) { + if (N1.isUndef()) { bool Changed = false; SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumElts; ++i) { @@ -13251,7 +13420,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } if (Changed) - return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } // If it is a splat, check if the argument vector is another splat or a @@ -13275,7 +13444,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue Base; bool AllSame = true; for (unsigned i = 0; i != NumElts; ++i) { - if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + if (!V->getOperand(i).isUndef()) { Base = V->getOperand(i); break; } @@ -13296,13 +13465,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Canonicalize any other splat as a build_vector. const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); SmallVector<SDValue, 8> Ops(NumElts, Splatted); - SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - V->getValueType(0), Ops); + SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) - NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + NewBV = DAG.getBitcast(VT, NewBV); return NewBV; } } @@ -13315,12 +13483,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && - (N1.getOpcode() == ISD::UNDEF || + (N1.isUndef() || (N1.getOpcode() == ISD::CONCAT_VECTORS && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { - SDValue V = partitionShuffleOfConcats(N, DAG); - - if (V.getNode()) + if (SDValue V = partitionShuffleOfConcats(N, DAG)) return V; } @@ -13357,7 +13523,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Op = TLI.isZExtFree(Op.getValueType(), SVT) ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + return DAG.getBuildVector(VT, SDLoc(N), Ops); } } @@ -13365,7 +13531,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // attempt to merge the 2 shuffles and suitably bitcast the inputs/output // back to their original types. if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && - N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { // Peek through the bitcast only if there is one user. @@ -13426,11 +13592,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } if (LegalMask) { - SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); - SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); - return DAG.getNode( - ISD::BITCAST, SDLoc(N), VT, - DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + SV0 = DAG.getBitcast(ScaleVT, SV0); + SV1 = DAG.getBitcast(ScaleVT, SV1); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); } } } @@ -13451,7 +13616,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue SV0 = N1->getOperand(0); SDValue SV1 = N1->getOperand(1); bool HasSameOp0 = N0 == SV0; - bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + bool IsSV1Undef = SV1.isUndef(); if (HasSameOp0 || IsSV1Undef || N0 == SV1) // Commute the operands of this shuffle so that next rule // will trigger. @@ -13504,7 +13669,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // Simple case where 'CurrentVec' is UNDEF. - if (CurrentVec.getOpcode() == ISD::UNDEF) { + if (CurrentVec.isUndef()) { Mask.push_back(-1); continue; } @@ -13559,7 +13724,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); } return SDValue(); @@ -13595,26 +13760,30 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + if (N0.getValueType() != N1.getValueType()) + return SDValue(); + // If the input vector is a concatenation, and the insert replaces // one of the halves, we can optimize into a single concat_vectors. - if (N0.getOpcode() == ISD::CONCAT_VECTORS && - N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && + N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); EVT VT = N->getValueType(0); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) if (InsIdx == 0) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - N->getOperand(1), N0.getOperand(1)); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1, + N0.getOperand(1)); // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors X, Z) - if (InsIdx == VT.getVectorNumElements()/2) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - N0.getOperand(0), N->getOperand(1)); + if (InsIdx == VT.getVectorNumElements() / 2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0), + N1); } return SDValue(); @@ -13684,7 +13853,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { int EltIdx = i / Split; int SubIdx = i % Split; SDValue Elt = RHS.getOperand(EltIdx); - if (Elt.getOpcode() == ISD::UNDEF) { + if (Elt.isUndef()) { Indices.push_back(-1); continue; } @@ -13724,7 +13893,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Zero = DAG.getConstant(0, dl, ClearVT); return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, DAG.getBitcast(ClearVT, LHS), - Zero, &Indices[0])); + Zero, Indices)); }; // Determine maximum split level (byte level masking). @@ -13763,8 +13932,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // -> (shuffle (VBinOp (A, B)), Undef, Mask). if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && - LHS.getOperand(1).getOpcode() == ISD::UNDEF && - RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + LHS.getOperand(1).isUndef() && + RHS.getOperand(1).isUndef()) { ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); @@ -13776,15 +13945,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { N->getFlags()); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, - &SVN0->getMask()[0]); + SVN0->getMask()); } } return SDValue(); } -SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, - SDValue N1, SDValue N2){ +SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2) { assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, @@ -13819,33 +13988,33 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { - // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) - // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < 0. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) SDValue Sqrt = RHS; ISD::CondCode CC; SDValue CmpLHS; - const ConstantFPSDNode *NegZero = nullptr; + const ConstantFPSDNode *Zero = nullptr; if (TheSelect->getOpcode() == ISD::SELECT_CC) { CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); CmpLHS = TheSelect->getOperand(0); - NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); } else { // SELECT or VSELECT SDValue Cmp = TheSelect->getOperand(0); if (Cmp.getOpcode() == ISD::SETCC) { CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); CmpLHS = Cmp.getOperand(0); - NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); } } - if (NegZero && NegZero->isNegative() && NegZero->isZero() && + if (Zero && Zero->isZero() && Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || CC == ISD::SETULT || CC == ISD::SETLT)) { - // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) CombineTo(TheSelect, Sqrt); return true; } @@ -13932,24 +14101,22 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // It is safe to replace the two loads if they have different alignments, // but the new load must be the minimum (most restrictive) alignment of the // inputs. - bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); + MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); + if (!RLD->isInvariant()) + MMOFlags &= ~MachineMemOperand::MOInvariant; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { - Load = DAG.getLoad(TheSelect->getValueType(0), - SDLoc(TheSelect), - // FIXME: Discards pointer and AA info. - LLD->getChain(), Addr, MachinePointerInfo(), - LLD->isVolatile(), LLD->isNonTemporal(), - isInvariant, Alignment); + // FIXME: Discards pointer and AA info. + Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), + LLD->getChain(), Addr, MachinePointerInfo(), Alignment, + MMOFlags); } else { - Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? - RLD->getExtensionType() : LLD->getExtensionType(), - SDLoc(TheSelect), - TheSelect->getValueType(0), - // FIXME: Discards pointer and AA info. - LLD->getChain(), Addr, MachinePointerInfo(), - LLD->getMemoryVT(), LLD->isVolatile(), - LLD->isNonTemporal(), isInvariant, Alignment); + // FIXME: Discards pointer and AA info. + Load = DAG.getExtLoad( + LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() + : LLD->getExtensionType(), + SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, + MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); } // Users of the select now use the result of the load. @@ -13967,9 +14134,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. -SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, - SDValue N2, SDValue N3, - ISD::CondCode CC, bool NotExtCompare) { +SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, + bool NotExtCompare) { // (x ? y : y) -> y. if (N2 == N3) return N2; @@ -14057,7 +14224,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return DAG.getLoad( TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, Alignment); + Alignment); } } @@ -14116,7 +14283,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. - APInt AndMask = ConstAndRHS->getAPIntValue(); + const APInt &AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); @@ -14210,13 +14377,48 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } } + // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) + // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) + // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) + // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) + if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue ValueOnZero = N2; + SDValue Count = N3; + // If the condition is NE instead of E, swap the operands. + if (CC == ISD::SETNE) + std::swap(ValueOnZero, Count); + // Check if the value on zero is a constant equal to the bits in the type. + if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) { + if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { + // If the other operand is cttz/cttz_zero_undef of N0, and cttz is + // legal, combine to just cttz. + if ((Count.getOpcode() == ISD::CTTZ || + Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) + return DAG.getNode(ISD::CTTZ, DL, VT, N0); + // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is + // legal, combine to just ctlz. + if ((Count.getOpcode() == ISD::CTLZ || + Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) + return DAG.getNode(ISD::CTLZ, DL, VT, N0); + } + } + } + return SDValue(); } /// This is a stub for TargetLowering::SimplifySetCC. -SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, - SDValue N1, ISD::CondCode Cond, - SDLoc DL, bool foldBooleans) { +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); @@ -14227,6 +14429,11 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); @@ -14268,6 +14475,11 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { /// number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); @@ -14334,9 +14546,9 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { /// => /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. -SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); @@ -14363,6 +14575,13 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } + + // If non-reciprocal square root is requested, multiply the result by Arg. + if (!Reciprocal) { + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + AddToWorklist(Est.getNode()); + } + return Est; } @@ -14371,35 +14590,55 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) -SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); - // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) - for (unsigned i = 0; i < Iterations; ++i) { - SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); - AddToWorklist(HalfEst.getNode()); - - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(Est.getNode()); + // This routine must enter the loop below to work correctly + // when (Reciprocal == false). + assert(Iterations > 0); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - - Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); - AddToWorklist(Est.getNode()); + // Newton iterations for reciprocal square root: + // E = (E * -0.5) * ((A * E) * E + -3.0) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); + AddToWorklist(AE.getNode()); + + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); + AddToWorklist(AEE.getNode()); + + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + AddToWorklist(RHS.getNode()); + + // When calculating a square root at the last iteration build: + // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) + // (notice a common subexpression) + SDValue LHS; + if (Reciprocal || (i + 1) < Iterations) { + // RSQRT: LHS = (E * -0.5) + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + } else { + // SQRT: LHS = (A * E) * -0.5 + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + } + AddToWorklist(LHS.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); AddToWorklist(Est.getNode()); } + return Est; } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case +/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if +/// Op can be zero. +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, + bool Reciprocal) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -14410,9 +14649,9 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { AddToWorklist(Est.getNode()); if (Iterations) { - Est = UseOneConstNR ? - BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : - BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); + Est = UseOneConstNR + ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); } return Est; } @@ -14420,6 +14659,30 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { return SDValue(); } +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { + return buildSqrtEstimateImpl(Op, Flags, true); +} + +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { + SDValue Est = buildSqrtEstimateImpl(Op, Flags, false); + if (!Est) + return SDValue(); + + // Unfortunately, Est is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + EVT VT = Est.getValueType(); + SDLoc DL(Op); + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = getSetCCResultType(VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + + Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp, + Zero, Est); + AddToWorklist(Est.getNode()); + return Est; +} + /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, @@ -14514,7 +14777,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && (Op0->getMemoryVT().getSizeInBits() >> 3 == Op1->getMemoryVT().getSizeInBits() >> 3) && - (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { + (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) { int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); @@ -14634,63 +14897,6 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, break; } } - - // We need to be careful here to also search for aliases through the - // value operand of a store, etc. Consider the following situation: - // Token1 = ... - // L1 = load Token1, %52 - // S1 = store Token1, L1, %51 - // L2 = load Token1, %52+8 - // S2 = store Token1, L2, %51+8 - // Token2 = Token(S1, S2) - // L3 = load Token2, %53 - // S3 = store Token2, L3, %52 - // L4 = load Token2, %53+8 - // S4 = store Token2, L4, %52+8 - // If we search for aliases of S3 (which loads address %52), and we look - // only through the chain, then we'll miss the trivial dependence on L1 - // (which also loads from %52). We then might change all loads and - // stores to use Token1 as their chain operand, which could result in - // copying %53 into %52 before copying %52 into %51 (which should - // happen first). - // - // The problem is, however, that searching for such data dependencies - // can become expensive, and the cost is not directly related to the - // chain depth. Instead, we'll rule out such configurations here by - // insisting that we've visited all chain users (except for users - // of the original chain, which is not necessary). When doing this, - // we need to look through nodes we don't care about (otherwise, things - // like register copies will interfere with trivial cases). - - SmallVector<const SDNode *, 16> Worklist; - for (const SDNode *N : Visited) - if (N != OriginalChain.getNode()) - Worklist.push_back(N); - - while (!Worklist.empty()) { - const SDNode *M = Worklist.pop_back_val(); - - // We have already visited M, and want to make sure we've visited any uses - // of M that we care about. For uses that we've not visisted, and don't - // care about, queue them to the worklist. - - for (SDNode::use_iterator UI = M->use_begin(), - UIE = M->use_end(); UI != UIE; ++UI) - if (UI.getUse().getValueType() == MVT::Other && - Visited.insert(*UI).second) { - if (isa<MemSDNode>(*UI)) { - // We've not visited this use, and we care about it (it could have an - // ordering dependency with the original node). - Aliases.clear(); - Aliases.push_back(OriginalChain); - return; - } - - // We've not visited this use, but we don't care about it. Mark it as - // visited and enqueue it to the worklist. - Worklist.push_back(*UI); - } - } } /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain @@ -14713,17 +14919,17 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.Base.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.isUndef()) return false; SmallVector<StoreSDNode *, 8> ChainedStores; @@ -14742,7 +14948,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) @@ -14756,6 +14962,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { while (true) { if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { // We found a store node. Use it for the next iteration. + if (STn->isVolatile() || STn->isIndexed()) { + Index = nullptr; + break; + } ChainedStores.push_back(STn); Index = STn; break; @@ -14769,7 +14979,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { } } - bool MadeChange = false; + bool MadeChangeToSt = false; SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; for (StoreSDNode *ChainedStore : ChainedStores) { @@ -14777,7 +14987,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { SDValue BetterChain = FindBetterChain(ChainedStore, Chain); if (Chain != BetterChain) { - MadeChange = true; + if (ChainedStore == St) + MadeChangeToSt = true; BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); } } @@ -14787,7 +14998,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { for (auto Replacement : BetterChains) replaceStoreChain(Replacement.first, Replacement.second); - return MadeChange; + return MadeChangeToSt; } /// This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index cfbb20947acc7..b10da002fcfec 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,7 +39,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Analysis.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -56,6 +55,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -88,6 +88,8 @@ void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf); + IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError); Alignment = CS->getParamAlignment(AttrIdx); } @@ -351,7 +353,8 @@ void FastISel::recomputeInsertPt() { void FastISel::removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { - assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) && + std::distance(I, E) > 0 && "Invalid iterator!"); while (I != E) { MachineInstr *Dead = &*I; ++I; @@ -372,7 +375,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() { void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) - LastLocalValue = std::prev(FuncInfo.InsertPt); + LastLocalValue = &*std::prev(FuncInfo.InsertPt); // Restore the previous insert position. FuncInfo.InsertPt = OldInsertPt.InsertPt; @@ -492,13 +495,11 @@ bool FastISel::selectGetElementPtr(const User *I) { uint64_t TotalOffs = 0; // FIXME: What's a good SWAG number for MaxOffs? uint64_t MaxOffs = 2048; - Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(DL); - for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1, - E = I->op_end(); - OI != E; ++OI) { - const Value *Idx = *OI; - if (auto *StTy = dyn_cast<StructType>(Ty)) { + for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (auto *StTy = dyn_cast<StructType>(*GTI)) { uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset @@ -511,9 +512,8 @@ bool FastISel::selectGetElementPtr(const User *I) { TotalOffs = 0; } } - Ty = StTy->getElementType(Field); } else { - Ty = cast<SequentialType>(Ty)->getElementType(); + Type *Ty = GTI.getIndexedType(); // If this is a constant subscript, handle it quickly. if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { @@ -880,9 +880,8 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, unsigned NumArgs) { ImmutableCallSite CS(CI); - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FTy->getReturnType(); + FunctionType *FTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); ArgListTy Args; Args.reserve(NumArgs); @@ -960,6 +959,10 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { Flags.setInReg(); if (Arg.IsSRet) Flags.setSRet(); + if (Arg.IsSwiftSelf) + Flags.setSwiftSelf(); + if (Arg.IsSwiftError) + Flags.setSwiftError(); if (Arg.IsByVal) Flags.setByVal(); if (Arg.IsInAlloca) { @@ -1010,9 +1013,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { bool FastISel::lowerCall(const CallInst *CI) { ImmutableCallSite CS(CI); - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FuncTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FuncTy->getReturnType(); + FunctionType *FuncTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); ArgListTy Args; ArgListEntry Entry; @@ -1322,6 +1324,15 @@ bool FastISel::selectBitCast(const User *I) { return true; } +// Return true if we should copy from swift error to the final vreg as specified +// by SwiftErrorWorklist. +static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI, + FunctionLoweringInfo &FuncInfo) { + if (!TLI.supportSwiftError()) + return false; + return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB); +} + // Remove local value instructions starting from the instruction after // SavedLastLocalValue to the current function insert point. void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) @@ -1345,7 +1356,11 @@ bool FastISel::selectInstruction(const Instruction *I) { MachineInstr *SavedLastLocalValue = getLastLocalValue(); // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. - if (isa<TerminatorInst>(I)) + if (isa<TerminatorInst>(I)) { + // If we need to materialize any vreg from worklist, we bail out of + // FastISel. + if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo)) + return false; if (!handlePHINodesInSuccessorBlocks(I->getParent())) { // PHI node handling may have generated local value instructions, // even though it failed to handle all PHI nodes. @@ -1354,6 +1369,13 @@ bool FastISel::selectInstruction(const Instruction *I) { removeDeadLocalValueCode(SavedLastLocalValue); return false; } + } + + // FastISel does not handle any operand bundles except OB_funclet. + if (ImmutableCallSite CS = ImmutableCallSite(I)) + for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) + if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) + return false; DbgLoc = I->getDebugLoc(); @@ -1413,7 +1435,8 @@ bool FastISel::selectInstruction(const Instruction *I) { /// Emit an unconditional branch to the given block, unless it is the immediate /// (fall-through) successor, and update the CFG. -void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { +void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, + const DebugLoc &DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -2053,7 +2076,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } - FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg)); DbgLoc = DebugLoc(); } } @@ -2138,7 +2161,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { const Value *Ptr; Type *ValTy; unsigned Alignment; - unsigned Flags; + MachineMemOperand::Flags Flags; bool IsVolatile; if (const auto *LI = dyn_cast<LoadInst>(I)) { diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 08815ed787dcd..e669ffc3d02af 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -88,6 +88,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, RegInfo = &MF->getRegInfo(); MachineModuleInfo &MMI = MF->getMMI(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + unsigned StackAlign = TFI->getStackAlignment(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -96,6 +97,31 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, Fn->isVarArg(), Outs, Fn->getContext()); + // If this personality uses funclets, we need to do a bit more work. + DenseMap<const AllocaInst *, int *> CatchObjects; + EHPersonality Personality = classifyEHPersonality( + Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr); + if (isFuncletEHPersonality(Personality)) { + // Calculate state numbers if we haven't already. + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (Personality == EHPersonality::MSVC_CXX) + calculateWinCXXEHStateNumbers(&fn, EHInfo); + else if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&fn, EHInfo); + else if (Personality == EHPersonality::CoreCLR) + calculateClrEHStateNumbers(&fn, EHInfo); + + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (const AllocaInst *AI = H.CatchObj.Alloca) + CatchObjects.insert({AI, &H.CatchObj.FrameIndex}); + else + H.CatchObj.FrameIndex = INT_MAX; + } + } + } + // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. @@ -108,7 +134,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, unsigned Align = std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), AI->getAlignment()); - unsigned StackAlign = TFI->getStackAlignment(); // Static allocas can be folded into the initial stack frame // adjustment. For targets that don't realign the stack, don't @@ -120,9 +145,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + int FrameIndex = INT_MAX; + auto Iter = CatchObjects.find(AI); + if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { + FrameIndex = MF->getFrameInfo()->CreateFixedObject( + TySize, 0, /*Immutable=*/false, /*isAliased=*/true); + MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align); + } else { + FrameIndex = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + } - StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + StaticAllocaMap[AI] = FrameIndex; + // Update the catch handler information. + if (Iter != CatchObjects.end()) + *Iter->second = FrameIndex; } else { // FIXME: Overaligned static allocas should be grouped into // a single dynamic allocation instead of using a separate @@ -281,31 +318,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, LPads.push_back(LPI); } - // If this personality uses funclets, we need to do a bit more work. - if (!Fn->hasPersonalityFn()) - return; - EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); if (!isFuncletEHPersonality(Personality)) return; - // Calculate state numbers if we haven't already. WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); - if (Personality == EHPersonality::MSVC_CXX) - calculateWinCXXEHStateNumbers(&fn, EHInfo); - else if (isAsynchronousEHPersonality(Personality)) - calculateSEHStateNumbers(&fn, EHInfo); - else if (Personality == EHPersonality::CoreCLR) - calculateClrEHStateNumbers(&fn, EHInfo); // Map all BB references in the WinEH data to MBBs. for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { for (WinEHHandlerType &H : TBME.HandlerArray) { - if (H.CatchObj.Alloca) { - assert(StaticAllocaMap.count(H.CatchObj.Alloca)); - H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca]; - } else { - H.CatchObj.FrameIndex = INT_MAX; - } if (H.Handler) H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; } @@ -336,7 +356,7 @@ void FunctionLoweringInfo::clear() { ByValArgFrameIndexMap.clear(); RegFixups.clear(); StatepointStackSlots.clear(); - StatepointRelocatedValues.clear(); + StatepointSpillMaps.clear(); PreferredExtendType.clear(); } @@ -575,3 +595,21 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, } } } + +unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB, + const Value* Val) const { + // Find the index in SwiftErrorVals. + SwiftErrorValues::const_iterator I = + std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val); + assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals"); + return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()]; +} + +void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB, + const Value* Val, unsigned VReg) { + // Find the index in SwiftErrorVals. + SwiftErrorValues::iterator I = + std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val); + assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals"); + SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg; +} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a1e2d410ab00e..c8af73a3b4416 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -319,7 +320,6 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, "Chain and glue operands should occur at end of operand list!"); // Get/emit the operand. unsigned VReg = getVR(Op, VRBaseMap); - assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && @@ -333,6 +333,8 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, const TargetRegisterClass *DstRC = nullptr; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); + assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) && + "Expected VReg"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), @@ -440,7 +442,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - MVT VT, DebugLoc DL) { + MVT VT, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -873,7 +875,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) - TLI->AdjustInstrPostInstrSelection(MIB, Node); + TLI->AdjustInstrPostInstrSelection(*MIB, Node); } /// EmitSpecialNode - Generate machine code for a target-independent node and diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 3b24d93c74fab..8a8a1bbd18f71 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -83,8 +83,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// ConstrainForSubReg - Try to constrain VReg to a register class that /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. - unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - MVT VT, DebugLoc DL); + unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, + const DebugLoc &DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// @@ -132,7 +132,7 @@ public: /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); - + private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f7836345f7206..81634096c1ba1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -93,25 +93,25 @@ private: /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. - SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, - SDValue Idx, SDLoc dl); - SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, - SDValue Idx, SDLoc dl); + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + const SDLoc &dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, + const SDLoc &dl); /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> - SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef<int> Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - bool &NeedInvert, SDLoc dl); + bool &NeedInvert, const SDLoc &dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, SDLoc dl); + unsigned NumOps, bool isSigned, const SDLoc &dl); std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -128,26 +128,28 @@ private: void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, SDLoc dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, + const SDLoc &dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); - void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const; - SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL, + void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, + SDValue Value) const; + SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, - SDLoc dl); + const SDLoc &dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl); + const SDLoc &dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl); + const SDLoc &dl); - SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl); - SDValue ExpandBSWAP(SDValue Op, SDLoc dl); - SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); + SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); + SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -176,8 +178,6 @@ public: "Replacing one node with another that produces a different number " "of values!"); DAG.ReplaceAllUsesWith(Old, New); - for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i)); if (UpdatedNodes) UpdatedNodes->insert(New); ReplacedNode(Old); @@ -187,7 +187,6 @@ public: dbgs() << " with: "; New->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); - DAG.TransferDbgValues(Old, New); if (UpdatedNodes) UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); @@ -200,7 +199,6 @@ public: DEBUG(dbgs() << (i == 0 ? " with: " : " and: "); New[i]->dump(&DAG)); - DAG.TransferDbgValues(SDValue(Old, i), New[i]); if (UpdatedNodes) UpdatedNodes->insert(New[i].getNode()); } @@ -213,10 +211,9 @@ public: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> -SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, - SDValue N1, SDValue N2, - ArrayRef<int> Mask) const { +SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType( + EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, + ArrayRef<int> Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -224,7 +221,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); if (NumEltsGrowth == 1) - return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask); SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { @@ -238,7 +235,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, } assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); - return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); + return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask); } /// Expands the ConstantFP node to an integer constant or @@ -285,13 +282,12 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { SDValue Result = DAG.getExtLoad( ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, - false, false, false, Alignment); + Alignment); return Result; } - SDValue Result = - DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, Alignment); + SDValue Result = DAG.getLoad( + OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } @@ -302,301 +298,20 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - SDValue Result = - DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, Alignment); + SDValue Result = DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } -/// Expands an unaligned store to 2 half-size stores. -static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI, - SelectionDAGLegalize *DAGLegalize) { - assert(ST->getAddressingMode() == ISD::UNINDEXED && - "unaligned indexed stores not implemented!"); - SDValue Chain = ST->getChain(); - SDValue Ptr = ST->getBasePtr(); - SDValue Val = ST->getValue(); - EVT VT = Val.getValueType(); - int Alignment = ST->getAlignment(); - unsigned AS = ST->getAddressSpace(); - - SDLoc dl(ST); - if (ST->getMemoryVT().isFloatingPoint() || - ST->getMemoryVT().isVector()) { - EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - if (TLI.isTypeLegal(intVT)) { - // Expand to a bitconvert of the value to the integer type of the - // same size, then a (misaligned) int store. - // FIXME: Does not handle truncating floating point stores! - SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); - DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); - return; - } - // Do a (aligned) store to a stack slot, then copy from the stack slot - // to the final destination using (unaligned) integer loads and stores. - EVT StoredVT = ST->getMemoryVT(); - MVT RegVT = - TLI.getRegisterType(*DAG.getContext(), - EVT::getIntegerVT(*DAG.getContext(), - StoredVT.getSizeInBits())); - unsigned StoredBytes = StoredVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); - - // Perform the original store, only redirected to the stack slot. - SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, MachinePointerInfo(), - StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant( - RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout(), AS)); - SmallVector<SDValue, 8> Stores; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, - MachinePointerInfo(), - false, false, false, 0); - // Store it to the final location. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo().getWithOffset(Offset), - ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); - // Increment the pointers. - Offset += RegBytes; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - } - - // The last store may be partial. Do a truncating store. On big-endian - // machines this requires an extending load from the stack slot to ensure - // that the bits are in the right place. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (StoredBytes - Offset)); - - // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo(), - MemVT, false, false, false, 0); - - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo() - .getWithOffset(Offset), - MemVT, ST->isVolatile(), - ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset), - ST->getAAInfo())); - // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); - DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); - return; - } - assert(ST->getMemoryVT().isInteger() && - !ST->getMemoryVT().isVector() && - "Unaligned store of unknown type."); - // Get the half-size VT - EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); - int NumBits = NewStoredVT.getSizeInBits(); - int IncrementSize = NumBits / 8; - - // Divide the stored value in two parts. - SDValue ShiftAmount = - DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType(), - DAG.getDataLayout())); - SDValue Lo = Val; - SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); - - // Store the two parts - SDValue Store1, Store2; - Store1 = DAG.getTruncStore(Chain, dl, - DAG.getDataLayout().isLittleEndian() ? Lo : Hi, - Ptr, ST->getPointerInfo(), NewStoredVT, - ST->isVolatile(), ST->isNonTemporal(), Alignment); - - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - TLI.getPointerTy(DAG.getDataLayout(), AS))); - Alignment = MinAlign(Alignment, IncrementSize); - Store2 = DAG.getTruncStore( - Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, - ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo()); - - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); - DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); -} - -/// Expands an unaligned load to 2 half-size loads. -static void -ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI, - SDValue &ValResult, SDValue &ChainResult) { - assert(LD->getAddressingMode() == ISD::UNINDEXED && - "unaligned indexed loads not implemented!"); - SDValue Chain = LD->getChain(); - SDValue Ptr = LD->getBasePtr(); - EVT VT = LD->getValueType(0); - EVT LoadedVT = LD->getMemoryVT(); - SDLoc dl(LD); - if (VT.isFloatingPoint() || VT.isVector()) { - EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); - if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { - // Expand to a (misaligned) integer load of the same size, - // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, - LD->getMemOperand()); - SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); - if (LoadedVT != VT) - Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : - ISD::ANY_EXTEND, dl, VT, Result); - - ValResult = Result; - ChainResult = newLoad.getValue(1); - return; - } - - // Copy the value to a (aligned) stack slot using (unaligned) integer - // loads and stores, then do a (aligned) load from the stack slot. - MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); - unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - - SDValue Increment = - DAG.getConstant(RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout())); - SmallVector<SDValue, 8> Stores; - SDValue StackPtr = StackBase; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the original location. - SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, - LD->getPointerInfo().getWithOffset(Offset), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); - // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), false, false, 0)); - // Increment the pointers. - Offset += RegBytes; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - } - - // The last copy may be partial. Do an extending load. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, - LD->getPointerInfo().getWithOffset(Offset), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), - LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); - // Follow the load with a store to the stack slot. Remember the store. - // On big-endian machines this requires a truncating store to ensure - // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), MemVT, - false, false, 0)); - - // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); - - // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT, false,false, false, - 0); - - // Callers expect a MERGE_VALUES node. - ValResult = Load; - ChainResult = TF; - return; - } - assert(LoadedVT.isInteger() && !LoadedVT.isVector() && - "Unaligned load of unsupported type."); - - // Compute the new VT that is half the size of the old one. This is an - // integer MVT. - unsigned NumBits = LoadedVT.getSizeInBits(); - EVT NewLoadedVT; - NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); - NumBits >>= 1; - - unsigned Alignment = LD->getAlignment(); - unsigned IncrementSize = NumBits / 8; - ISD::LoadExtType HiExtType = LD->getExtensionType(); - - // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. - if (HiExtType == ISD::NON_EXTLOAD) - HiExtType = ISD::ZEXTLOAD; - - // Load the value in two parts - SDValue Lo, Hi; - if (DAG.getDataLayout().isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), Alignment, - LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(),LD->isInvariant(), - MinAlign(Alignment, IncrementSize), LD->getAAInfo()); - } else { - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), Alignment, - LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - MinAlign(Alignment, IncrementSize), LD->getAAInfo()); - } - - // aggregate the two parts - SDValue ShiftAmount = - DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType(), - DAG.getDataLayout())); - SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); - Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); - - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - ValResult = Result; - ChainResult = TF; -} - /// Some target cannot handle a variable insertion index for the /// INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. -SDValue SelectionDAGLegalize:: -PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, - SDLoc dl) { +SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, + SDValue Val, + SDValue Idx, + const SDLoc &dl) { SDValue Tmp1 = Vec; SDValue Tmp2 = Val; SDValue Tmp3 = Idx; @@ -618,8 +333,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // Store the vector. SDValue Ch = DAG.getStore( DAG.getEntryNode(), dl, Tmp1, StackPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, - false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); // Truncate or zero extend offset to target pointer type. Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); @@ -629,17 +343,15 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, DAG.getConstant(EltSize, dl, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. - Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, - false, false, 0); + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), SPFI), - false, false, false, 0); + DAG.getMachineFunction(), SPFI)); } - -SDValue SelectionDAGLegalize:: -ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { +SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, + const SDLoc &dl) { if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for @@ -658,8 +370,7 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { for (unsigned i = 0; i != NumElts; ++i) ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); - return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, - &ShufOps[0]); + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps); } } return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); @@ -676,8 +387,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { @@ -686,8 +396,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), SDLoc(CFP), MVT::i32); - return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, + MMOFlags, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -696,7 +406,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + Alignment, MMOFlags, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -709,14 +419,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, + MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, MinAlign(Alignment, 4U), - AAInfo); + MinAlign(Alignment, 4U), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -732,8 +441,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDLoc dl(Node); unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { @@ -754,8 +462,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } break; } case TargetLowering::Custom: { @@ -770,9 +480,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { "Can only promote stores to same size type"); Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, AAInfo); + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -794,8 +503,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { StVT.getStoreSizeInBits()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = - DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, AAInfo); + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, + Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -815,9 +524,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, - isVolatile, isNonTemporal, Alignment, - AAInfo); + RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -828,10 +535,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + Hi = DAG.getTruncStore( + Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -841,18 +548,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment, - AAInfo); + RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + Lo = DAG.getTruncStore( + Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } // The order of the stores doesn't matter. @@ -867,8 +573,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } break; } case TargetLowering::Custom: { @@ -886,8 +594,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { "Do not know how to expand this store!"); Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -917,13 +625,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); + } break; } case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(RVal, DAG); - if (Res.getNode()) { + if (SDValue Res = TLI.LowerOperation(RVal, DAG)) { RVal = Res; RChain = Res.getValue(1); } @@ -956,9 +664,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && @@ -985,10 +691,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); Ch = Result.getValue(1); // The chain. @@ -1023,10 +727,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { if (DL.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, isInvariant, Alignment, AAInfo); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, + AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1035,8 +738,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, + AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1056,19 +759,18 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, isInvariant, Alignment, AAInfo); + LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, + AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, - dl, Node->getValueType(0), Chain, Ptr, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, + AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1099,8 +801,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = SDValue(Node, 1); if (isCustom) { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { + if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { Value = Res; Chain = Res.getValue(1); } @@ -1111,8 +812,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG); + } } break; } @@ -1399,8 +1101,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case TargetLowering::Custom: { // FIXME: The handling for custom lowering with multiple results is // a complete mess. - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { + if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { if (!(Res.getNode() != Node || Res.getResNo() != 0)) return; @@ -1467,7 +1168,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // Caches for hasPredecessorHelper SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; - + Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { @@ -1485,7 +1186,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain // we will make the index dependent on the load). - if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(ST, Visited, Worklist)) continue; StackPtr = ST->getBasePtr(); @@ -1498,7 +1199,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // Store the value to a temporary stack slot, then LOAD the returned part. StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo()); } // Add the offset to the index. @@ -1513,12 +1214,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue NewLoad; if (Op.getValueType().isVector()) - NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, - MachinePointerInfo(), false, false, false, 0); + NewLoad = + DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); else - NewLoad = DAG.getExtLoad( - ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), - Vec.getValueType().getVectorElementType(), false, false, false, 0); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + MachinePointerInfo(), + Vec.getValueType().getVectorElementType()); // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); @@ -1549,8 +1250,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, - false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. @@ -1566,12 +1266,10 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { StackPtr); // Store the subvector. - Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, - MachinePointerInfo(), false, false, 0); + Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); // Finally, load the updated vector. - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, - false, false, false, 0); + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1593,7 +1291,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. - if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (Node->getOperand(i).isUndef()) continue; unsigned Offset = TypeByteSize*i; @@ -1605,13 +1303,10 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), - EltVT, false, false, 0)); + PtrInfo.getWithOffset(Offset), EltVT)); } else - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), - false, false, 0)); + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, PtrInfo.getWithOffset(Offset))); } SDValue StoreChain; @@ -1621,8 +1316,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, - false, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); } namespace { @@ -1645,7 +1339,8 @@ struct FloatSignAsInt { /// containing the sign bit if the target has no integer value capable of /// holding all bits of the floating-point value. void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, - SDLoc DL, SDValue Value) const { + const SDLoc &DL, + SDValue Value) const { EVT FloatVT = Value.getValueType(); unsigned NumBits = FloatVT.getSizeInBits(); State.FloatVT = FloatVT; @@ -1669,7 +1364,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, MachineFunction &MF = DAG.getMachineFunction(); State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, - State.FloatPointerInfo, false, false, 0); + State.FloatPointerInfo); SDValue IntPtr; if (DataLayout.isBigEndian()) { @@ -1687,9 +1382,8 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, } State.IntPtr = IntPtr; - State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, - IntPtr, State.IntPointerInfo, MVT::i8, - false, false, false, 0); + State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, + State.IntPointerInfo, MVT::i8); State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); State.SignBit = 7; } @@ -1697,16 +1391,16 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, /// Replace the integer value produced by getSignAsIntValue() with a new value /// and cast the result back to a floating-point type. SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, - SDLoc DL, SDValue NewIntValue) const { + const SDLoc &DL, + SDValue NewIntValue) const { if (!State.Chain) return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); // Override the part containing the sign bit in the value stored on the stack. SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, - State.IntPointerInfo, MVT::i8, false, false, - 0); + State.IntPointerInfo, MVT::i8); return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, - State.FloatPointerInfo, false, false, false, 0); + State.FloatPointerInfo); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { @@ -1843,11 +1537,10 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. -bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, - SDValue &LHS, SDValue &RHS, - SDValue &CC, +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, + SDValue &RHS, SDValue &CC, bool &NeedInvert, - SDLoc dl) { + const SDLoc &dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; @@ -1944,10 +1637,8 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. -SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, - EVT SlotVT, - EVT DestVT, - SDLoc dl) { +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, + EVT DestVT, const SDLoc &dl) { // Create the stack frame object. unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); @@ -1969,22 +1660,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, SDValue Store; if (SrcSize > SlotSize) - Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - PtrInfo, SlotVT, false, false, SrcAlign); + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, + SlotVT, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); - Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - PtrInfo, false, false, SrcAlign); + Store = + DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) - return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, - false, false, false, DestAlign); + return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, - PtrInfo, SlotVT, false, false, false, DestAlign); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, + DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1999,11 +1689,10 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue Ch = DAG.getTruncStore( DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), - Node->getValueType(0).getVectorElementType(), false, false, 0); + Node->getValueType(0).getVectorElementType()); return DAG.getLoad( Node->getValueType(0), dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, - false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); } static bool @@ -2025,7 +1714,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, NewIntermedVals; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; SDValue Vec; @@ -2044,7 +1733,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, SmallVector<int, 16> FinalIndices; FinalIndices.reserve(IntermedVals[i].second.size() + IntermedVals[i+1].second.size()); - + int k = 0; for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; ++j, ++k) { @@ -2061,7 +1750,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, if (Phase) Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, IntermedVals[i+1].first, - ShuffleVec.data()); + ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; NewIntermedVals.push_back( @@ -2092,7 +1781,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; if (Phase) - Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; } @@ -2117,7 +1806,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { bool isConstant = true; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; @@ -2160,7 +1849,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { CI->getZExtValue())); } } else { - assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + assert(Node->getOperand(i).isUndef()); Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } @@ -2171,13 +1860,13 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, Alignment); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + Alignment); } SmallSet<SDValue, 16> DefinedValues; for (unsigned i = 0; i < NumElems; ++i) { - if (Node->getOperand(i).getOpcode() == ISD::UNDEF) + if (Node->getOperand(i).isUndef()) continue; DefinedValues.insert(Node->getOperand(i)); } @@ -2187,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { SmallVector<int, 8> ShuffleVec(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; ShuffleVec[i] = V == Value1 ? 0 : NumElems; } @@ -2201,7 +1890,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { Vec2 = DAG.getUNDEF(VT); // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); } } else { SDValue Res; @@ -2243,15 +1932,18 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue InChain = DAG.getEntryNode(); // isTailCall may be true since the callee does not reference caller stack - // frame. Check if it's in the right position. + // frame. Check if it's in the right position and that the return types match. SDValue TCChain = InChain; - bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); + const Function *F = DAG.getMachineFunction().getFunction(); + bool isTailCall = + TLI.isInTailCallPosition(DAG, Node, TCChain) && + (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy()); if (isTailCall) InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2267,7 +1959,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, - bool isSigned, SDLoc dl) { + bool isSigned, const SDLoc &dl) { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -2286,7 +1978,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2320,7 +2012,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2415,14 +2107,14 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, - MachinePointerInfo(), false, false, false, 0); + SDValue Rem = + DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo()); Results.push_back(CallInfo.first); Results.push_back(Rem); } @@ -2449,8 +2141,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, return false; // GNU sin/cos functions set errno while sincos does not. Therefore // combining sin and cos is only safe if unsafe-fpmath is enabled. - bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; - if (isGNU && !TM.Options.UnsafeFPMath) + if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath) return false; return true; } @@ -2528,26 +2219,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0); + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, - MachinePointerInfo(), false, false, false, 0)); - Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, - MachinePointerInfo(), false, false, false, 0)); + Results.push_back( + DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo())); + Results.push_back( + DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo())); } /// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. -SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, - SDValue Op0, +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, - SDLoc dl) { + const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? - + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2574,18 +2264,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Op0Mapped = Op0; } // store the lo of the constructed double - based on integer input - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, - Op0Mapped, Lo, MachinePointerInfo(), - false, false, 0); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo, + MachinePointerInfo()); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); // store the hi of the constructed double - biased exponent - SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, - MachinePointerInfo(), - false, false, 0); + SDValue Store2 = + DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo()); // load the constructed double - SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, - MachinePointerInfo(), false, false, false, 0); + SDValue Load = + DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2733,13 +2421,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad( MVT::f32, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, Alignment); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + Alignment); else { SDValue Load = DAG.getExtLoad( ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, - false, false, false, Alignment); + Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2753,10 +2441,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. -SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, - EVT DestVT, +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl) { + const SDLoc &dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2795,10 +2482,9 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. -SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, - EVT DestVT, +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl) { + const SDLoc &dl) { // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; @@ -2835,11 +2521,11 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, } /// Open code the operations for BITREVERSE. -SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { +SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Sz = VT.getScalarSizeInBits(); - + SDValue Tmp, Tmp2; Tmp = DAG.getConstant(0, dl, VT); for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { @@ -2849,7 +2535,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { else Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); - + APInt Shift(Sz, 1); Shift = Shift.shl(J); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); @@ -2860,7 +2546,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { } /// Open code the operations for BSWAP of the specified operation. -SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; @@ -2914,7 +2600,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { /// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, - SDLoc dl) { + const SDLoc &dl) { switch (Opc) { default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { @@ -3111,10 +2797,38 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { cast<AtomicSDNode>(Node)->getFailureOrdering(), cast<AtomicSDNode>(Node)->getSynchScope()); - SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1), - Res, Node->getOperand(2), ISD::SETEQ); + SDValue ExtRes = Res; + SDValue LHS = Res; + SDValue RHS = Node->getOperand(1); + + EVT AtomicType = cast<AtomicSDNode>(Node)->getMemoryVT(); + EVT OuterType = Node->getValueType(0); + switch (TLI.getExtendForAtomicOps()) { + case ISD::SIGN_EXTEND: + LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res, + DAG.getValueType(AtomicType)); + RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType, + Node->getOperand(2), DAG.getValueType(AtomicType)); + ExtRes = LHS; + break; + case ISD::ZERO_EXTEND: + LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, + DAG.getValueType(AtomicType)); + RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + ExtRes = LHS; + break; + case ISD::ANY_EXTEND: + LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); + RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } + + SDValue Success = + DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ); - Results.push_back(Res.getValue(0)); + Results.push_back(ExtRes.getValue(0)); Results.push_back(Success); Results.push_back(Res.getValue(1)); break; @@ -3400,7 +3114,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - + case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); @@ -3442,7 +3156,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, DAG.getIntPtrConstant(0, dl)); Results.push_back( - DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); + DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal)); } } break; @@ -3760,10 +3474,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad( ISD::SEXTLOAD, dl, PTy, Chain, Addr, - MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT, - false, false, false, 0); + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT); Addr = LD; - if (TM.getRelocationModel() == Reloc::PIC_) { + if (TM.isPositionIndependent()) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. @@ -3786,7 +3499,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(2)); } else { // We test only the i1 bit. Skip the AND if UNDEF. - Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : + Tmp3 = (Tmp2.isUndef()) ? Tmp2 : DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, @@ -4008,7 +3721,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -4031,7 +3744,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); @@ -4048,7 +3761,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("abort", TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -4269,18 +3982,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + if (Node->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(), + OVT.getSizeInBits()); + Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1, + DAG.getConstant(TopBit, dl, NVT)); + } // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - if (Node->getOpcode() == ISD::CTTZ) { - // FIXME: This should set a bit in the zero extended value instead. - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), - Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT), - ISD::SETEQ); - Tmp1 = DAG.getSelect(dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1); - } else if (Node->getOpcode() == ISD::CTLZ || - Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6c0193a76732a..31ebf7bbec13c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -149,9 +149,26 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { if (isLegalInHWReg(N->getValueType(ResNo))) return SDValue(N, ResNo); ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); - return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), - TLI.getTypeToTransformTo(*DAG.getContext(), - CN->getValueType(0))); + // In ppcf128, the high 64 bits are always first in memory regardless + // of Endianness. LLVM's APFloat representation is not Endian sensitive, + // and so always converts into a 128-bit APInt in a non-Endian-sensitive + // way. However, APInt's are serialized in an Endian-sensitive fashion, + // so on big-Endian targets, the two doubles are output in the wrong + // order. Fix this by manually flipping the order of the high 64 bits + // and the low 64 bits here. + if (DAG.getDataLayout().isBigEndian() && + CN->getValueType(0).getSimpleVT() == llvm::MVT::ppcf128) { + uint64_t words[2] = { CN->getValueAPF().bitcastToAPInt().getRawData()[1], + CN->getValueAPF().bitcastToAPInt().getRawData()[0] }; + APInt Val(128, words); + return DAG.getConstant(Val, SDLoc(CN), + TLI.getTypeToTransformTo(*DAG.getContext(), + CN->getValueType(0))); + } else { + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), + TLI.getTypeToTransformTo(*DAG.getContext(), + CN->getValueType(0))); + } } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -614,12 +631,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDLoc dl(N); + auto MMOFlags = + L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { - NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), - NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment(), + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, + L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -629,12 +647,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { } // Do a non-extending load followed by FP_EXTEND. - NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, - L->getMemoryVT(), dl, L->getChain(), - L->getBasePtr(), L->getOffset(), L->getPointerInfo(), - L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment(), - L->getAAInfo()); + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), + dl, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(), + MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -800,6 +816,7 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FCOPYSIGN: case ISD::FNEG: case ISD::Register: + case ISD::SELECT: return true; } return false; @@ -1516,7 +1533,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - SDLoc dl) { + const SDLoc &dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); @@ -1868,6 +1885,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Binary FP Operations case ISD::FADD: case ISD::FDIV: + case ISD::FMAXNAN: + case ISD::FMINNAN: case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::FMUL: @@ -2063,13 +2082,14 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); - // Load the value as an integer value with the same number of bits + // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), - IVT, SDLoc(N), L->getChain(), L->getBasePtr(), - L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment(), - L->getAAInfo()); + auto MMOFlags = + L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; + SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, + SDLoc(N), L->getChain(), L->getBasePtr(), + L->getOffset(), L->getPointerInfo(), IVT, + L->getAlignment(), MMOFlags, L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); @@ -2102,9 +2122,14 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { // Construct a SDNode that transforms the SINT or UINT operand to the promoted // float type. SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { + SDLoc DL(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0)); + SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0)); + // Round the value to the desired precision (that of the source type). + return DAG.getNode( + ISD::FP_EXTEND, DL, NVT, + DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL))); } SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 74f80db6d01b9..3ab9459c8af7b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -436,10 +436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); - - return DAG.getNode(ISD::AssertZext, dl, - NVT, Res, DAG.getValueType(N->getValueType(0))); + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { @@ -1374,6 +1371,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::OR: case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + case ISD::UMAX: + case ISD::SMAX: + case ISD::UMIN: + case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break; + case ISD::ADD: case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; @@ -1404,7 +1406,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); @@ -1442,15 +1444,6 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, } else if (Amt == NVTBits) { Lo = DAG.getConstant(0, DL, NVT); Hi = InL; - } else if (Amt == 1 && - TLI.isOperationLegalOrCustom(ISD::ADDC, - TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { - // Emit this X << 1 as X+X. - SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); - SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps); - SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, @@ -1675,6 +1668,54 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { } } +static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) { + + switch (Op) { + default: llvm_unreachable("invalid min/max opcode"); + case ISD::SMAX: + return std::make_pair(ISD::SETGT, ISD::UMAX); + case ISD::UMAX: + return std::make_pair(ISD::SETUGT, ISD::UMAX); + case ISD::SMIN: + return std::make_pair(ISD::SETLT, ISD::UMIN); + case ISD::UMIN: + return std::make_pair(ISD::SETULT, ISD::UMIN); + } +} + +void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc DL(N); + ISD::NodeType LoOpc; + ISD::CondCode CondC; + std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode()); + + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + // Value types + EVT NVT = LHSL.getValueType(); + EVT CCT = getSetCCResultType(NVT); + + // Hi part is always the same op + Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH}); + + // We need to know whether to select Lo part that corresponds to 'winning' + // Hi part or if Hi parts are equal. + SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC); + SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ); + + // Lo part corresponding to the 'winning' Hi part + SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); + + // Recursed Lo part if Hi parts are equal, this uses unsigned version + SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL}); + + Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); +} + void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -2006,9 +2047,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); - bool isInvariant = N->isInvariant(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -2017,9 +2056,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, isInvariant, - Alignment, AAInfo); + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, + Alignment, MMOFlags, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -2041,8 +2079,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } else if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. - Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment, + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags, AAInfo); unsigned ExcessBits = @@ -2055,8 +2092,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2074,8 +2110,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + Alignment, MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -2084,8 +2119,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2145,7 +2179,49 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, LC = RTLIB::MUL_I64; else if (VT == MVT::i128) LC = RTLIB::MUL_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + if (LC == RTLIB::UNKNOWN_LIBCALL) { + // We'll expand the multiplication by brute force because we have no other + // options. This is a trivially-generalized version of the code from + // Hacker's Delight (itself derived from Knuth's Algorithm M from section + // 4.3.1). + SDValue Mask = + DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(), + NVT.getSizeInBits() >> 1), dl, NVT); + SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask); + SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask); + + SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); + SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); + + SDValue Shift = + DAG.getConstant(NVT.getSizeInBits() >> 1, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout())); + SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); + SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); + SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); + + SDValue U = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL); + SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask); + SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift); + + SDValue V = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL); + SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift); + + SDValue W = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LL, RL), + DAG.getNode(ISD::ADD, dl, NVT, UH, VH)); + Lo = DAG.getNode(ISD::ADD, dl, NVT, TH, + DAG.getNode(ISD::SHL, dl, NVT, V, Shift)); + + Hi = DAG.getNode(ISD::ADD, dl, NVT, W, + DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, RH, LL), + DAG.getNode(ISD::MUL, dl, NVT, RL, LH))); + return; + } SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, @@ -2495,9 +2571,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, - DAG.getConstant(0, dl, PtrVT), Temp, - MachinePointerInfo(), false, false, 0); + SDValue Chain = + DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp, + MachinePointerInfo()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2522,14 +2598,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args)) .setSExtResult(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); - SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, false, 0); + SDValue Temp2 = + DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo()); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, dl, PtrVT), ISD::SETNE); @@ -2703,7 +2779,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - SDLoc dl) { + const SDLoc &dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedInteger(NewLHS, LHSLo, LHSHi); GetExpandedInteger(NewRHS, RHSLo, RHSHi); @@ -2956,8 +3032,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); SDValue Lo, Hi; @@ -2967,16 +3042,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment, AAInfo); + N->getMemoryVT(), Alignment, MMOFlags, AAInfo); } if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, + AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2986,10 +3060,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), - NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + Hi = DAG.getTruncStore( + Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -3017,8 +3090,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { } // Store both the high bits and maybe some of the low bits. - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment, + MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -3027,8 +3100,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -3104,7 +3176,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Fudge = DAG.getExtLoad( ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, - false, false, false, Alignment); + Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 2a0b0aa447948..144bed241ee70 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -27,7 +27,7 @@ using namespace llvm; static cl::opt<bool> EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); -/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +/// Do extensive, expensive, sanity checking. void DAGTypeLegalizer::PerformExpensiveChecks() { // If a node is not processed, then none of its values should be mapped by any // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. @@ -174,9 +174,9 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { } } -/// run - This is the main entry point for the type legalizer. This does a -/// top-down traversal of the dag, legalizing types as it goes. Returns "true" -/// if it made any changes. +/// This is the main entry point for the type legalizer. This does a top-down +/// traversal of the dag, legalizing types as it goes. Returns "true" if it made +/// any changes. bool DAGTypeLegalizer::run() { bool Changed = false; @@ -204,7 +204,7 @@ bool DAGTypeLegalizer::run() { // Now that we have a set of nodes to process, handle them all. while (!Worklist.empty()) { -#ifndef XDEBUG +#ifndef EXPENSIVE_CHECKS if (EnableExpensiveChecks) #endif PerformExpensiveChecks(); @@ -394,7 +394,7 @@ NodeDone: } } -#ifndef XDEBUG +#ifndef EXPENSIVE_CHECKS if (EnableExpensiveChecks) #endif PerformExpensiveChecks(); @@ -461,11 +461,10 @@ NodeDone: return Changed; } -/// AnalyzeNewNode - The specified node is the root of a subtree of potentially -/// new nodes. Correct any processed operands (this may change the node) and -/// calculate the NodeId. If the node itself changes to a processed node, it -/// is not remapped - the caller needs to take care of this. -/// Returns the potentially changed node. +/// The specified node is the root of a subtree of potentially new nodes. +/// Correct any processed operands (this may change the node) and calculate the +/// NodeId. If the node itself changes to a processed node, it is not remapped - +/// the caller needs to take care of this. Returns the potentially changed node. SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // If this was an existing node that is already done, we're done. if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) @@ -536,7 +535,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { return N; } -/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// Call AnalyzeNewNode, updating the node in Val if needed. /// If the node changes to a processed node, then remap it. void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { Val.setNode(AnalyzeNewNode(Val.getNode())); @@ -545,7 +544,7 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { RemapValue(Val); } -/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// If N has a bogus mapping in ReplacedValues, eliminate it. /// This can occur when a node is deleted then reallocated as a new node - /// the mapping in ReplacedValues applies to the deleted node, not the new /// one. @@ -626,7 +625,7 @@ void DAGTypeLegalizer::ExpungeNode(SDNode *N) { ReplacedValues.erase(SDValue(N, i)); } -/// RemapValue - If the specified value was already legalized to another value, +/// If the specified value was already legalized to another value, /// replace it by that value. void DAGTypeLegalizer::RemapValue(SDValue &N) { DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); @@ -643,8 +642,8 @@ void DAGTypeLegalizer::RemapValue(SDValue &N) { } namespace { - /// NodeUpdateListener - This class is a DAGUpdateListener that listens for - /// updates to nodes and recomputes their ready state. + /// This class is a DAGUpdateListener that listens for updates to nodes and + /// recomputes their ready state. class NodeUpdateListener : public SelectionDAG::DAGUpdateListener { DAGTypeLegalizer &DTL; SmallSetVector<SDNode*, 16> &NodesToAnalyze; @@ -689,9 +688,8 @@ namespace { } -/// ReplaceValueWith - The specified value was legalized to the specified other -/// value. Update the DAG and NodeIds replacing any uses of From to use To -/// instead. +/// The specified value was legalized to the specified other value. +/// Update the DAG and NodeIds replacing any uses of From to use To instead. void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { assert(From.getNode() != To.getNode() && "Potential legalization loop!"); @@ -905,15 +903,14 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { // Utilities. //===----------------------------------------------------------------------===// -/// BitConvertToInteger - Convert to an integer of the same size. +/// Convert to an integer of the same size. SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); return DAG.getNode(ISD::BITCAST, SDLoc(Op), EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } -/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the -/// same size. +/// Convert to a vector of integers of the same size. SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { assert(Op.getValueType().isVector() && "Only applies to vectors!"); unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); @@ -930,15 +927,14 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo()); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo()); } -/// CustomLowerNode - Replace the node's results with custom code provided -/// by the target and return "true", or do nothing and return "false". +/// Replace the node's results with custom code provided by the target and +/// return "true", or do nothing and return "false". /// The last parameter is FALSE if we are dealing with a node with legal /// result types and illegal operand. The second parameter denotes the type of /// illegal OperandNo in that case. @@ -981,8 +977,8 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { } -/// CustomWidenLowerNode - Widen the node's results with custom code provided -/// by the target and return "true", or do nothing and return "false". +/// Widen the node's results with custom code provided by the target and return +/// "true", or do nothing and return "false". bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { // See if the target wants to custom lower this node. if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) @@ -992,7 +988,7 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { TLI.ReplaceNodeResults(N, Results, DAG); if (Results.empty()) - // The target didn't want to custom widen lower its result after all. + // The target didn't want to custom widen lower its result after all. return false; // Update the widening map. @@ -1010,8 +1006,8 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { return SDValue(N->getOperand(ResNo)); } -/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and -/// high parts of the given value. +/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the +/// given value. void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi) { SDLoc dl(Pair); @@ -1038,7 +1034,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); } -/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +/// Build an integer with low bits Lo and high bits Hi. SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { // Arbitrarily use dlHi for result SDLoc SDLoc dlHi(Hi); @@ -1056,7 +1052,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } -/// LibCallify - Convert the node into a libcall with the same prototype. +/// Convert the node into a libcall with the same prototype. SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { unsigned NumOps = N->getNumOperands(); @@ -1080,12 +1076,11 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } -// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to -// ExpandLibCall except that the first operand is the in-chain. +/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that +/// the first operand is the in-chain. std::pair<SDValue, SDValue> -DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, - bool isSigned) { +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -1106,7 +1101,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -1114,9 +1109,9 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, return CallInfo; } -/// PromoteTargetBoolean - Promote the given target boolean to a target boolean -/// of the given type. A target boolean is an integer value, not necessarily of -/// type i1, the bits of which conform to getBooleanContents. +/// Promote the given target boolean to a target boolean of the given type. +/// A target boolean is an integer value, not necessarily of type i1, the bits +/// of which conform to getBooleanContents. /// /// ValVT is the type of values that produced the boolean. SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { @@ -1127,9 +1122,9 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } -/// WidenTargetBoolean - Widen the given target boolean to a target boolean -/// of the given type. The boolean vector is widened and then promoted to match -/// the target boolean type of the given ValVT. +/// Widen the given target boolean to a target boolean of the given type. +/// The boolean vector is widened and then promoted to match the target boolean +/// type of the given ValVT. SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes) { SDLoc dl(Bool); @@ -1144,8 +1139,7 @@ SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, return PromoteTargetBoolean(Bool, ValVT); } -/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT -/// bits in Hi. +/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi) { @@ -1159,8 +1153,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } -/// SplitInteger - Return the lower and upper halves of Op's bits in a value -/// type half the size of Op's. +/// Return the lower and upper halves of Op's bits in a value type half the +/// size of Op's. void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), @@ -1173,9 +1167,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, // Entry Point //===----------------------------------------------------------------------===// -/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that -/// only uses types natively supported by the target. Returns "true" if it made -/// any changes. +/// This transforms the SelectionDAG into a SelectionDAG that only uses types +/// natively supported by the target. Returns "true" if it made any changes. /// /// Note that this is an involved process that may invalidate pointers into /// the graph. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8ba19f76797f9..84ad8f83d9069 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -17,7 +17,6 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -26,58 +25,56 @@ namespace llvm { //===----------------------------------------------------------------------===// -/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks -/// on it until only value types the target machine can handle are left. This -/// involves promoting small sizes to large sizes or splitting up large values -/// into small values. +/// This takes an arbitrary SelectionDAG as input and hacks on it until only +/// value types the target machine can handle are left. This involves promoting +/// small sizes to large sizes or splitting up large values into small values. /// class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { const TargetLowering &TLI; SelectionDAG &DAG; public: - // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information - // about the state of the node. The enum has all the values. + /// This pass uses the NodeId on the SDNodes to hold information about the + /// state of the node. The enum has all the values. enum NodeIdFlags { - /// ReadyToProcess - All operands have been processed, so this node is ready - /// to be handled. + /// All operands have been processed, so this node is ready to be handled. ReadyToProcess = 0, - /// NewNode - This is a new node, not before seen, that was created in the - /// process of legalizing some other node. + /// This is a new node, not before seen, that was created in the process of + /// legalizing some other node. NewNode = -1, - /// Unanalyzed - This node's ID needs to be set to the number of its - /// unprocessed operands. + /// This node's ID needs to be set to the number of its unprocessed + /// operands. Unanalyzed = -2, - /// Processed - This is a node that has already been processed. + /// This is a node that has already been processed. Processed = -3 // 1+ - This is a node which has this many unprocessed operands. }; private: - /// ValueTypeActions - This is a bitvector that contains two bits for each - /// simple value type, where the two bits correspond to the LegalizeAction - /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + /// This is a bitvector that contains two bits for each simple value type, + /// where the two bits correspond to the LegalizeAction enum from + /// TargetLowering. This can be queried with "getTypeAction(VT)". TargetLowering::ValueTypeActionImpl ValueTypeActions; - /// getTypeAction - Return how we should legalize values of this type. + /// Return how we should legalize values of this type. TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const { return TLI.getTypeAction(*DAG.getContext(), VT); } - /// isTypeLegal - Return true if this type is legal on this target. + /// Return true if this type is legal on this target. bool isTypeLegal(EVT VT) const { return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } - /// isSimpleLegalType - Return true if this is a simple legal type. + /// Return true if this is a simple legal type. bool isSimpleLegalType(EVT VT) const { return VT.isSimple() && TLI.isTypeLegal(VT); } - /// isLegalInHWReg - Return true if this type can be passed in registers. + /// Return true if this type can be passed in registers. /// For example, x86_64's f128, should to be legally in registers /// and only some operations converted to library calls or integer /// bitwise operations. @@ -90,51 +87,49 @@ private: return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } - /// IgnoreNodeResults - Pretend all of this node's results are legal. + /// Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { return N->getOpcode() == ISD::TargetConstant; } - /// PromotedIntegers - For integer nodes that are below legal width, this map - /// indicates what promoted value to use. + /// For integer nodes that are below legal width, this map indicates what + /// promoted value to use. SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; - /// ExpandedIntegers - For integer nodes that need to be expanded this map - /// indicates which operands are the expanded version of the input. + /// For integer nodes that need to be expanded this map indicates which + /// operands are the expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; - /// SoftenedFloats - For floating point nodes converted to integers of - /// the same size, this map indicates the converted value to use. + /// For floating-point nodes converted to integers of the same size, this map + /// indicates the converted value to use. SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; - /// PromotedFloats - For floating point nodes that have a smaller precision - /// than the smallest supported precision, this map indicates what promoted - /// value to use. + /// For floating-point nodes that have a smaller precision than the smallest + /// supported precision, this map indicates what promoted value to use. SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; - /// ExpandedFloats - For float nodes that need to be expanded this map - /// indicates which operands are the expanded version of the input. + /// For float nodes that need to be expanded this map indicates which operands + /// are the expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; - /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the - /// scalar value of type 'ty' to use. + /// For nodes that are <1 x ty>, this map indicates the scalar value of type + /// 'ty' to use. SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; - /// SplitVectors - For nodes that need to be split this map indicates - /// which operands are the expanded version of the input. + /// For nodes that need to be split this map indicates which operands are the + /// expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; - /// WidenedVectors - For vector nodes that need to be widened, indicates - /// the widened value to use. + /// For vector nodes that need to be widened, indicates the widened value to + /// use. SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; - /// ReplacedValues - For values that have been replaced with another, - /// indicates the replacement value to use. + /// For values that have been replaced with another, indicates the replacement + /// value to use. SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; - /// Worklist - This defines a worklist of nodes to process. In order to be - /// pushed onto this worklist, all operands of a node must have already been - /// processed. + /// This defines a worklist of nodes to process. In order to be pushed onto + /// this worklist, all operands of a node must have already been processed. SmallVector<SDNode*, 128> Worklist; public: @@ -145,7 +140,7 @@ public: "Too many value types for ValueTypeActions to hold!"); } - /// run - This is the main entry point for the type legalizer. This does a + /// This is the main entry point for the type legalizer. This does a /// top-down traversal of the dag, legalizing types as it goes. Returns /// "true" if it made any changes. bool run(); @@ -173,9 +168,9 @@ private: bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); bool CustomWidenLowerNode(SDNode *N, EVT VT); - /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES - /// node with the corresponding input operand, except for the result 'ResNo', - /// for which the corresponding input operand is returned. + /// Replace each result of the given MERGE_VALUES node with the corresponding + /// input operand, except for the result 'ResNo', for which the corresponding + /// input operand is returned. SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); @@ -201,9 +196,9 @@ private: // Integer Promotion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// - /// GetPromotedInteger - Given a processed operand Op which was promoted to a - /// larger integer type, this returns the promoted value. The low bits of the - /// promoted value corresponding to the original type are exactly equal to Op. + /// Given a processed operand Op which was promoted to a larger integer type, + /// this returns the promoted value. The low bits of the promoted value + /// corresponding to the original type are exactly equal to Op. /// The extra bits contain rubbish, so the promoted value may need to be zero- /// or sign-extended from the original type before it is usable (the helpers /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). @@ -218,8 +213,7 @@ private: } void SetPromotedInteger(SDValue Op, SDValue Result); - /// SExtPromotedInteger - Get a promoted operand and sign extend it to the - /// final size. + /// Get a promoted operand and sign extend it to the final size. SDValue SExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); SDLoc dl(Op); @@ -228,8 +222,7 @@ private: DAG.getValueType(OldVT)); } - /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the - /// final size. + /// Get a promoted operand and zero extend it to the final size. SDValue ZExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); SDLoc dl(Op); @@ -322,9 +315,9 @@ private: // Integer Expansion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// - /// GetExpandedInteger - Given a processed operand Op which was expanded into - /// two integers of half the size, this returns the two halves. The low bits - /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// Given a processed operand Op which was expanded into two integers of half + /// the size, this returns the two halves. The low bits of Op are exactly + /// equal to the bits of Lo; the high bits exactly equal Hi. /// For example, if Op is an i64 which was expanded into two i32's, then this /// method returns the two i32's, with Lo being equal to the lower 32 bits of /// Op, and Hi being equal to the upper 32 bits. @@ -362,6 +355,8 @@ private: void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -388,14 +383,14 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, SDLoc dl); + ISD::CondCode &CCCode, const SDLoc &dl); //===--------------------------------------------------------------------===// // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer - /// if the Op is not supported in target HW and converted to the integer. + /// Given an operand Op of Float type, returns the integer if the Op is not + /// supported in target HW and converted to the integer. /// The integer contains exactly the same bits as Op - only the type changed. /// For example, if Op is an f32 which was softened to an i32, then this method /// returns an i32, the bits of which coincide with those of Op. @@ -487,8 +482,8 @@ private: // Float Expansion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetExpandedFloat - Given a processed operand Op which was expanded into - /// two floating point values of half the size, this returns the two halves. + /// Given a processed operand Op which was expanded into two floating-point + /// values of half the size, this returns the two halves. /// The low bits of Op are exactly equal to the bits of Lo; the high bits /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded /// into two f64's, then this method returns the two f64's, with Lo being @@ -542,8 +537,7 @@ private: SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, SDLoc dl); - + ISD::CondCode &CCCode, const SDLoc &dl); //===--------------------------------------------------------------------===// // Float promotion support: LegalizeFloatTypes.cpp @@ -586,9 +580,9 @@ private: // Scalarization Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetScalarizedVector - Given a processed one-element vector Op which was - /// scalarized to its element type, this returns the element. For example, - /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + /// Given a processed one-element vector Op which was scalarized to its + /// element type, this returns the element. For example, if Op is a v1i32, + /// Op = < i32 val >, this method returns val, an i32. SDValue GetScalarizedVector(SDValue Op) { SDValue &ScalarizedOp = ScalarizedVectors[Op]; RemapValue(ScalarizedOp); @@ -636,12 +630,12 @@ private: // Vector Splitting Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetSplitVector - Given a processed vector Op which was split into vectors - /// of half the size, this method returns the halves. The first elements of - /// Op coincide with the elements of Lo; the remaining elements of Op coincide - /// with the elements of Hi: Op is what you would get by concatenating Lo and - /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then - /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// Given a processed vector Op which was split into vectors of half the size, + /// this method returns the halves. The first elements of Op coincide with the + /// elements of Lo; the remaining elements of Op coincide with the elements of + /// Hi: Op is what you would get by concatenating Lo and Hi. + /// For example, if Op is a v8i32 that was split into two v4i32's, then this + /// method returns the two v4i32's, with Lo corresponding to the first 4 /// elements of Op, and Hi to the last 4 elements. void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); @@ -653,6 +647,7 @@ private: void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -692,12 +687,12 @@ private: // Vector Widening Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetWidenedVector - Given a processed vector Op which was widened into a - /// larger vector, this method returns the larger vector. The elements of - /// the returned vector consist of the elements of Op followed by elements - /// containing rubbish. For example, if Op is a v2i32 that was widened to a - /// v4i32, then this method returns a v4i32 for which the first two elements - /// are the same as those of Op, while the last two elements contain rubbish. + /// Given a processed vector Op which was widened into a larger vector, this + /// method returns the larger vector. The elements of the returned vector + /// consist of the elements of Op followed by elements containing rubbish. + /// For example, if Op is a v2i32 that was widened to a v4i32, then this + /// method returns a v4i32 for which the first two elements are the same as + /// those of Op, while the last two elements contain rubbish. SDValue GetWidenedVector(SDValue Op) { SDValue &WidenedOp = WidenedVectors[Op]; RemapValue(WidenedOp); @@ -713,6 +708,7 @@ private: SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); @@ -755,29 +751,29 @@ private: // Vector Widening Utilities Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// Helper GenWidenVectorLoads - Helper function to generate a set of - /// loads to load a vector with a resulting wider type. It takes + /// Helper function to generate a set of loads to load a vector with a + /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD); - /// GenWidenVectorExtLoads - Helper function to generate a set of extension - /// loads to load a ector with a resulting wider type. It takes + /// Helper function to generate a set of extension loads to load a vector with + /// a resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen /// ExtType: extension element type SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType); - /// Helper genWidenVectorStores - Helper function to generate a set of - /// stores to store a widen vector into non-widen memory + /// Helper function to generate a set of stores to store a widen vector into + /// non-widen memory. /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); - /// Helper genWidenVectorTruncStores - Helper function to generate a set of - /// stores to store a truncate widen vector into non-widen memory + /// Helper function to generate a set of stores to store a truncate widen + /// vector into non-widen memory. /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, @@ -785,8 +781,7 @@ private: /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. - /// When FillWithZeroes is "on" the vector will be widened with - /// zeroes. + /// When FillWithZeroes is "on" the vector will be widened with zeroes. /// By default, the vector will be widened with undefined values. SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false); @@ -807,8 +802,8 @@ private: GetExpandedFloat(Op, Lo, Hi); } - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and - /// high parts of the given value. + /// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the + /// given value. void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); // Generic Result Splitting. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 593c346df770e..665180e119b73 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -170,12 +170,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, - false, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -185,8 +183,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), false, - false, false, MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), + MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) @@ -263,16 +261,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); - Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment, + LD->getMemOperand()->getFlags(), AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -280,8 +274,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -478,8 +472,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); unsigned Alignment = St->getAlignment(); - bool isVolatile = St->isVolatile(); - bool isNonTemporal = St->isNonTemporal(); AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -491,15 +483,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, + St->getMemOperand()->getFlags(), AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), + St->getMemOperand()->getFlags(), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f61f631e2ff8d..3c9cb17b58b27 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -358,8 +358,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Legal: break; case TargetLowering::Custom: { - SDValue Tmp1 = TLI.LowerOperation(Op, DAG); - if (Tmp1.getNode()) { + if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { Result = Tmp1; break; } @@ -493,21 +492,26 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - SDLoc dl(Op); LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); - SDValue Chain = LD->getChain(); - SDValue BasePTR = LD->getBasePtr(); - EVT SrcVT = LD->getMemoryVT(); - ISD::LoadExtType ExtType = LD->getExtensionType(); - SmallVector<SDValue, 8> Vals; - SmallVector<SDValue, 8> LoadChains; + EVT SrcVT = LD->getMemoryVT(); + EVT SrcEltVT = SrcVT.getScalarType(); unsigned NumElem = SrcVT.getVectorNumElements(); - EVT SrcEltVT = SrcVT.getScalarType(); - EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + SDValue NewChain; + SDValue Value; if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + SDLoc dl(Op); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + EVT DstEltVT = LD->getValueType(0).getScalarType(); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + // When elements in a vector is not byte-addressable, we cannot directly // load each element by advancing pointer, which could only address bytes. // Instead, we load all significant words, mask bits off, and concatenate @@ -531,24 +535,22 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { unsigned LoadBytes = WideBytes; if (RemainingBytes >= LoadBytes) { - ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); + ScalarLoad = + DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { LoadBytes >>= 1; // Reduce the load size by half. LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); } - ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); + ScalarLoad = + DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), LoadVT, + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); } RemainingBytes -= LoadBytes; @@ -614,29 +616,17 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } Vals.push_back(Lo); } - } else { - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - for (unsigned Idx=0; Idx<NumElem; Idx++) { - SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, - Op.getNode()->getValueType(0).getScalarType(), - Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); - - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, dl, BasePTR.getValueType())); + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), Vals); + } else { + SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - Vals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } + NewChain = Scalarized.getValue(1); + Value = Scalarized.getValue(0); } - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), Vals); - AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -644,54 +634,37 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } SDValue VectorLegalizer::ExpandStore(SDValue Op) { - SDLoc dl(Op); StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); - SDValue Chain = ST->getChain(); - SDValue BasePTR = ST->getBasePtr(); - SDValue Value = ST->getValue(); - EVT StVT = ST->getMemoryVT(); - - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - unsigned NumElem = StVT.getVectorNumElements(); - // The type of the data we want to save - EVT RegVT = Value.getValueType(); - EVT RegSclVT = RegVT.getScalarType(); - // The type of data as saved in memory. + EVT StVT = ST->getMemoryVT(); EVT MemSclVT = StVT.getScalarType(); - - // Cast floats into integers unsigned ScalarSize = MemSclVT.getSizeInBits(); // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) - ScalarSize = NextPowerOf2(ScalarSize); - - // Store Stride in bytes - unsigned Stride = ScalarSize/8; - // Extract each of the elements from the original vector - // and save them into memory individually. - SmallVector<SDValue, 8> Stores; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - - // This scalar TruncStore may be illegal, but we legalize it later. - SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, - ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), - AAInfo); - - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, dl, BasePTR.getValueType())); - - Stores.push_back(Store); + if (!isPowerOf2_32(ScalarSize)) { + // FIXME: This is completely broken and inconsistent with ExpandLoad + // handling. + + // For sub-byte element sizes, this ends up with 0 stride between elements, + // so the same element just gets re-written to the same location. There seem + // to be tests explicitly testing for this broken behavior though. tests + // for this broken behavior. + + LLVMContext &Ctx = *DAG.getContext(); + + EVT NewMemVT + = EVT::getVectorVT(Ctx, + MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), + StVT.getVectorNumElements()); + + SDValue NewVectorStore = DAG.getTruncStore( + ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), + ST->getPointerInfo(), NewMemVT, ST->getAlignment(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + ST = cast<StoreSDNode>(NewVectorStore.getNode()); } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + SDValue TF = TLI.scalarizeVectorStore(ST, DAG); AddLegalizedOperand(Op, TF); return TF; } @@ -864,10 +837,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { int NumSrcElements = SrcVT.getVectorNumElements(); // Build up a zero vector to blend into this one. - EVT SrcScalarVT = SrcVT.getScalarType(); - SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT); - SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + SDValue Zero = DAG.getConstant(0, DL, SrcVT); // Shuffle the incoming lanes into the correct position, and pull all other // lanes from the zero vector. @@ -885,16 +855,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); } -SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { - EVT VT = Op.getValueType(); - - // Generate a byte wise shuffle mask for the BSWAP. - SmallVector<int, 16> ShuffleMask; +static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) for (int J = ScalarSizeInBytes - 1; J >= 0; --J) ShuffleMask.push_back((I * ScalarSizeInBytes) + J); +} +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector<int, 16> ShuffleMask; + createBSWAPShuffleMask(VT, ShuffleMask); EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); // Only emit a shuffle if the mask is legal. @@ -903,8 +876,7 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { SDLoc DL(Op); Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); - Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), - ShuffleMask.data()); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } @@ -915,12 +887,36 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) return DAG.UnrollVectorOp(Op.getNode()); + // If the vector element width is a whole number of bytes, test if its legal + // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte + // vector. This greatly reduces the number of bit shifts necessary. + unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); + if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { + SmallVector<int, 16> BSWAPMask; + createBSWAPShuffleMask(VT, BSWAPMask); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); + if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && + (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || + (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && + TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + BSWAPMask); + Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + } + // If we have the appropriate vector bit operations, it is better to use them // than unrolling and expanding each component. if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || - !TLI.isOperationLegalOrCustom(ISD::AND, VT) || - !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || + !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) return DAG.UnrollVectorOp(Op.getNode()); // Let LegalizeDAG handle this later. @@ -1027,10 +1023,12 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { } SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { - // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. + // If the non-ZERO_UNDEF version is supported we can use that instead. unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; - if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) - return Op; + if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) { + SDLoc DL(Op); + return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0)); + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0187d36dee21..f3adca49ccfe6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -223,17 +223,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); - SDValue Result = DAG.getLoad(ISD::UNINDEXED, - N->getExtensionType(), - N->getValueType(0).getVectorElementType(), - SDLoc(N), - N->getChain(), N->getBasePtr(), - DAG.getUNDEF(N->getBasePtr().getValueType()), - N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment(), - N->getAAInfo()); + SDValue Result = DAG.getLoad( + ISD::UNINDEXED, N->getExtensionType(), + N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(), + N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), + N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -370,7 +366,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { // Figure out if the scalar is the LHS or RHS and return it. SDValue Arg = N->getOperand(2).getOperand(0); - if (Arg.getOpcode() == ISD::UNDEF) + if (Arg.isUndef()) return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); return GetScalarizedVector(N->getOperand(Op)); @@ -476,16 +472,16 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { return false; } -/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs -/// to be scalarized, it must be <1 x ty>. Convert the element instead. +/// If the value to convert is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Elt); } -/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be -/// scalarized, it must be <1 x ty>. Do the operation on the element instead. +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexpected vector type!"); @@ -497,8 +493,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); } -/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - -/// use a BUILD_VECTOR instead. +/// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) @@ -506,9 +501,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); } -/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to -/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the -/// index. +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, +/// so just return the element, ignoring the index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Res = GetScalarizedVector(N->getOperand(0)); if (Res.getValueType() != N->getValueType(0)) @@ -518,8 +512,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { } -/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be -/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT +/// If the input condition is a vector that needs to be scalarized, it must be +/// <1 x i1>, so just convert to a normal ISD::SELECT /// (still with vector output type since that was acceptable if we got here). SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); @@ -529,29 +523,28 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { N->getOperand(2)); } -/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be -/// scalarized, it must be <1 x ty>. Just store the element. +/// If the value to store is a vector that needs to be scalarized, it must be +/// <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(N->isUnindexed() && "Indexed store of one-element vector?"); assert(OpNo == 1 && "Do not know how to scalarize this operand!"); SDLoc dl(N); if (N->isTruncatingStore()) - return DAG.getTruncStore(N->getChain(), dl, - GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); + return DAG.getTruncStore( + N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), N->getAlignment(), + N->getMemOperand()->getFlags(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment(), N->getAAInfo()); + N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); } -/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs -/// to be scalarized, it must be <1 x ty>. Convert the element instead. +/// If the value to round is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), @@ -564,11 +557,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { // Result Vector Splitting //===----------------------------------------------------------------------===// -/// SplitVectorResult - This method is called when the specified result of the -/// specified node is found to need vector splitting. At this point, the node -/// may also have invalid operands or may have other results that need -/// legalization, we just know that (at least) one result needs vector -/// splitting. +/// This method is called when the specified result of the specified node is +/// found to need vector splitting. At this point, the node may also have +/// invalid operands or may have other results that need legalization, we just +/// know that (at least) one result needs vector splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); @@ -621,6 +613,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + SplitVecRes_ExtVecInRegOp(N, Lo, Hi); + break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CONVERT_RNDSAT: @@ -664,6 +662,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::SUB: case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -845,23 +845,41 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); GetSplitVector(Vec, Lo, Hi); - // Spill the vector to the stack. EVT VecVT = Vec.getValueType(); - EVT SubVecVT = VecVT.getVectorElementType(); + EVT VecElemVT = VecVT.getVectorElementType(); + unsigned VecElems = VecVT.getVectorNumElements(); + unsigned SubElems = SubVec.getValueType().getVectorNumElements(); + + // If we know the index is 0, and we know the subvector doesn't cross the + // boundary between the halves, we can avoid spilling the vector, and insert + // into the lower half of the split vector directly. + // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever + // the index is constant and there is no boundary crossing. But those cases + // don't seem to get hit in practice. + if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = ConstIdx->getZExtValue(); + if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); + return; + } + } + + // Spill the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Store the new subvector into the specified index. - SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), - false, false, 0); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); + Lo = + DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -871,7 +889,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -917,6 +935,39 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, DAG.getValueType(HiVT)); } +void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + + SDLoc dl(N); + SDValue InLo, InHi; + GetSplitVector(N0, InLo, InHi); + EVT InLoVT = InLo.getValueType(); + unsigned InNumElements = InLoVT.getVectorNumElements(); + + EVT OutLoVT, OutHiVT; + std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + unsigned OutNumElements = OutLoVT.getVectorNumElements(); + assert((2 * OutNumElements) <= InNumElements && + "Illegal extend vector in reg split"); + + // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the + // input vector (i.e. we only use InLo): + // OutLo will extend the first OutNumElements from InLo. + // OutHi will extend the next OutNumElements from InLo. + + // Shuffle the elements from InLo for OutHi into the bottom elements to + // create a 'fake' InHi. + SmallVector<int, 8> SplitHi(InNumElements, -1); + for (unsigned i = 0; i != OutNumElements; ++i) + SplitHi[i] = i + OutNumElements; + InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi); + + Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo); + Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi); +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -947,20 +998,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, - false, false, 0); + Store = + DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); + Lo = + DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -970,7 +1021,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -995,25 +1046,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT MemoryVT = LD->getMemoryVT(); unsigned Alignment = LD->getOriginalAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment, AAInfo); + LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, + Alignment, MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1062,7 +1109,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); @@ -1074,7 +1121,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); @@ -1131,7 +1178,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -1362,7 +1409,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue Op1 = InputUsed[1] == -1U ? DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; // At least one input vector was used. Create a new shuffle vector. - Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops); } Ops.clear(); @@ -1374,10 +1421,10 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Operand Vector Splitting //===----------------------------------------------------------------------===// -/// SplitVectorOperand - This method is called when the specified operand of the -/// specified node is found to need vector splitting. At this point, all of the -/// result types of the node are known to be legal, but other operands of the -/// node may need legalization as well as the specified one. +/// This method is called when the specified operand of the specified node is +/// found to need vector splitting. At this point, all of the result types of +/// the node are known to be legal, but other operands of the node may need +/// legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); @@ -1600,13 +1647,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT, false, false, false, 0); + MachinePointerInfo(), EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, @@ -1646,7 +1693,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -1655,7 +1702,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, OpsLo, MMO); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -1688,7 +1735,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); - + EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); @@ -1717,7 +1764,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); @@ -1729,7 +1776,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), N->getRanges()); @@ -1778,7 +1825,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); @@ -1787,7 +1834,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, DL, OpsLo, MMO); MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); @@ -1810,8 +1857,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ptr = N->getBasePtr(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); - bool isVol = N->isVolatile(); - bool isNT = N->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); @@ -1822,11 +1868,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) - Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment, AAInfo); + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, + Alignment, MMOFlags, AAInfo); else - Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment, AAInfo); + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, + AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -1835,11 +1881,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment, AAInfo); + HiMemVT, Alignment, MMOFlags, AAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment, AAInfo); + Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -1889,7 +1935,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { EVT OutVT = N->getValueType(0); unsigned NumElements = OutVT.getVectorNumElements(); bool IsFloat = OutVT.isFloatingPoint(); - + // Widening should have already made sure this is a power-two vector // if we're trying to split it at all. assert() that's true, just in case. assert(!(NumElements & 1) && "Splitting vector, but not in half!"); @@ -2069,6 +2115,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Shift(N); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = WidenVecRes_EXTEND_VECTOR_INREG(N); + break; + case ISD::ANY_EXTEND: case ISD::FP_EXTEND: case ISD::FP_ROUND: @@ -2355,6 +2407,61 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue InOp = N->getOperand(0); + SDLoc DL(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenSVT = WidenVT.getVectorElementType(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InSVT = InVT.getVectorElementType(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) { + switch (Opcode) { + case ISD::ANY_EXTEND_VECTOR_INREG: + return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); + } + } + } + + // Unroll, extend the scalars and rebuild the vector. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + switch (Opcode) { + case ISD::ANY_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val); + break; + case ISD::SIGN_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val); + break; + case ISD::ZERO_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val); + break; + default: + llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected"); + } + Ops.push_back(Val); + } + + while (Ops.size() != WidenNumElts) + Ops.push_back(DAG.getUNDEF(WidenSVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); +} + SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { // If this is an FCOPYSIGN with same input types, we can treat it as a // normal (can trap) binary op. @@ -2546,7 +2653,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { // The inputs and the result are widen to the same value. unsigned i; for (i=1; i < NumOperands; ++i) - if (N->getOperand(i).getOpcode() != ISD::UNDEF) + if (!N->getOperand(i).isUndef()) break; if (i == NumOperands) @@ -2564,7 +2671,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { return DAG.getVectorShuffle(WidenVT, dl, GetWidenedVector(N->getOperand(0)), GetWidenedVector(N->getOperand(1)), - &MaskOps[0]); + MaskOps); } } } @@ -2744,7 +2851,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { - + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); @@ -2898,7 +3005,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { } for (unsigned i = NumElts; i != WidenNumElts; ++i) NewMask.push_back(-1); - return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { @@ -3072,9 +3179,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { - // Since the result is legal and the input is illegal, it is unlikely - // that we can fix the input to a legal type so unroll the convert - // into some scalar code and create a nasty build vector. + // Since the result is legal and the input is illegal, it is unlikely that we + // can fix the input to a legal type so unroll the convert into some scalar + // code and create a nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); @@ -3161,7 +3268,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { - // We have to widen the value but we want only to store the original + // We have to widen the value, but we want only to store the original // vector type. StoreSDNode *ST = cast<StoreSDNode>(N); @@ -3189,10 +3296,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) Mask = GetWidenedVector(Mask); else { - // The mask should be widened as well + // The mask should be widened as well. EVT BoolVT = getSetCCResultType(WideVal.getValueType()); // We can't use ModifyToType() because we should fill the mask with - // zeroes + // zeroes. unsigned WidenNumElts = BoolVT.getVectorNumElements(); unsigned MaskNumElts = MaskVT.getVectorNumElements(); @@ -3219,16 +3326,16 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); - // Widen the value + // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); EVT WideVT = WideVal.getValueType(); unsigned NumElts = WideVal.getValueType().getVectorNumElements(); SDLoc dl(N); - // The mask should be widened as well + // The mask should be widened as well. Mask = WidenTargetBoolean(Mask, WideVT, true); - // Widen index + // Widen index. SDValue Index = MSC->getIndex(); EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), Index.getValueType().getScalarType(), @@ -3293,7 +3400,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (Width == WidenEltWidth) return RetVT; - // See if there is larger legal integer than the element type to load/store + // See if there is larger legal integer than the element type to load/store. unsigned VT; for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { @@ -3355,7 +3462,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, NumElts = Width / NewLdTy.getSizeInBits(); NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); - // Readjust position and vector position based on new load type + // Readjust position and vector position based on new load type. Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; } @@ -3368,8 +3475,8 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD) { - // The strategy assumes that we can efficiently load powers of two widths. - // The routines chops the vector into the largest vector loads with the same + // The strategy assumes that we can efficiently load power-of-two widths. + // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector // type. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); @@ -3380,27 +3487,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); // Load information - SDValue Chain = LD->getChain(); - SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); - int WidthDiff = WidenWidth - LdWidth; // Difference - unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads + int WidthDiff = WidenWidth - LdWidth; + unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align, - AAInfo); + Align, MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); - // Check if we can load the element with one instruction + // Check if we can load the element with one instruction. if (LdWidth <= NewVTWidth) { if (!NewVT.isVector()) { unsigned NumElts = WidenWidth / NewVTWidth; @@ -3421,7 +3525,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); } - // Load vector by using multiple loads from largest vector to scalar + // Load vector by using multiple loads from largest vector to scalar. SmallVector<SDValue, 16> LdOps; LdOps.push_back(LdOp); @@ -3436,13 +3540,12 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, SDValue L; if (LdWidth < NewVTWidth) { - // Our current type we are using is too large, find a better size + // The current type we are using is too large. Find a better size. NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment), - AAInfo); + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -3456,9 +3559,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment), - AAInfo); + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); } @@ -3468,33 +3570,33 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LdWidth -= NewVTWidth; } - // Build the vector from the loads operations + // Build the vector from the load operations. unsigned End = LdOps.size(); if (!LdOps[0].getValueType().isVector()) // All the loads are scalar loads. return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); // If the load contains vectors, build the vector using concat vector. - // All of the vectors used to loads are power of 2 and the scalars load - // can be combined to make a power of 2 vector. + // All of the vectors used to load are power-of-2, and the scalar loads can be + // combined to make a power-of-2 vector. SmallVector<SDValue, 16> ConcatOps(End); int i = End - 1; int Idx = End; EVT LdTy = LdOps[i].getValueType(); - // First combine the scalar loads to a vector + // First, combine the scalar loads to a vector. if (!LdTy.isVector()) { for (--i; i >= 0; --i) { LdTy = LdOps[i].getValueType(); if (LdTy.isVector()) break; } - ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End); } ConcatOps[--Idx] = LdOps[i]; for (--i; i >= 0; --i) { EVT NewLdTy = LdOps[i].getValueType(); if (NewLdTy != LdTy) { - // Create a larger vector + // Create a larger vector. ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, makeArrayRef(&ConcatOps[Idx], End - Idx)); Idx = End - 1; @@ -3503,11 +3605,11 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, ConcatOps[--Idx] = LdOps[i]; } - if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + if (WidenWidth == LdTy.getSizeInBits() * (End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, makeArrayRef(&ConcatOps[Idx], End - Idx)); - // We need to fill the rest with undefs to build the vector + // We need to fill the rest with undefs to build the vector. unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); SmallVector<SDValue, 16> WidenOps(NumOps); SDValue UndefVal = DAG.getUNDEF(LdTy); @@ -3526,33 +3628,30 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType) { // For extension loads, it may not be more efficient to chop up the vector - // and then extended it. Instead, we unroll the load and build a new vector. + // and then extend it. Instead, we unroll the load and build a new vector. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); // Load information - SDValue Chain = LD->getChain(); - SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); unsigned NumElts = LdVT.getVectorNumElements(); - // Load each element and widen + // Load each element and widen. unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, - LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, isInvariant, - Align, AAInfo); + Ops[0] = + DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), + LdEltVT, Align, MMOFlags, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { @@ -3562,12 +3661,11 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, isInvariant, Align, - AAInfo); + Align, MMOFlags, AAInfo); LdChain.push_back(Ops[i].getValue(1)); } - // Fill the rest with undefs + // Fill the rest with undefs. SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i != WidenNumElts; ++i) Ops[i] = UndefVal; @@ -3578,14 +3676,13 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { - // The strategy assumes that we can efficiently store powers of two widths. - // The routines chops the vector into the largest vector stores with the same + // The strategy assumes that we can efficiently store power-of-two widths. + // The routine chops the vector into the largest vector stores with the same // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); unsigned Align = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -3601,7 +3698,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, int Idx = 0; // current index to store unsigned Offset = 0; // offset from base to store while (StWidth != 0) { - // Find the largest vector type we can store with + // Find the largest vector type we can store with. EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); unsigned NewVTWidth = NewVT.getSizeInBits(); unsigned Increment = NewVTWidth / 8; @@ -3611,10 +3708,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, SDValue EOp = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo().getWithOffset(Offset), - isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); + StChain.push_back(DAG.getStore( + Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -3623,28 +3719,27 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { - // Cast the vector to the scalar type we can store + // Cast the vector to the scalar type we can store. unsigned NumElts = ValWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); - // Readjust index position based on new vector type + // Readjust index position based on new vector type. Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo().getWithOffset(Offset), - isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); + StChain.push_back(DAG.getStore( + Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Increment, dl, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); - // Restore index back to be relative to the original widen element type + // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth / ValEltWidth; } } @@ -3654,27 +3749,25 @@ void DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // For extension loads, it may not be more efficient to truncate the vector - // and then store it. Instead, we extract each element and then store it. - SDValue Chain = ST->getChain(); - SDValue BasePtr = ST->getBasePtr(); + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); unsigned Align = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); - SDValue ValOp = GetWidenedVector(ST->getValue()); + SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); - // It must be true that we the widen vector type is bigger than where - // we need to store. + // It must be true that the wide vector type is bigger than where we need to + // store. assert(StVT.isVector() && ValOp.getValueType().isVector()); assert(StVT.bitsLT(ValOp.getValueType())); - // For truncating stores, we can not play the tricks of chopping legal - // vector types and bit cast it to the right type. Instead, we unroll - // the store. + // For truncating stores, we can not play the tricks of chopping legal vector + // types and bitcast it to the right type. Instead, we unroll the store. EVT StEltVT = StVT.getVectorElementType(); EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; @@ -3683,9 +3776,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align, - AAInfo)); + ST->getPointerInfo(), StEltVT, Align, + MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), @@ -3695,10 +3787,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, - ST->getPointerInfo().getWithOffset(Offset), - StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); + StChain.push_back(DAG.getTruncStore( + Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), + StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo)); } } diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile deleted file mode 100644 index ea716fdaabbab..0000000000000 --- a/lib/CodeGen/SelectionDAG/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMSelectionDAG - -include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 622e06f0da2aa..1e5c4a73693f3 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -37,7 +37,7 @@ static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable use of DFA during scheduling")); -static cl::opt<signed> RegPressureThreshold( +static cl::opt<int> RegPressureThreshold( "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), cl::desc("Track reg pressure and switch priority to in-depth")); @@ -323,8 +323,8 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { } } -signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { - signed RegBalance = 0; +int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + int RegBalance = 0; if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) return RegBalance; @@ -357,8 +357,8 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { /// The RawPressure flag makes this function to ignore /// existing reg file sizes, and report raw def/use /// balance. -signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { - signed RegBalance = 0; +int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + int RegBalance = 0; if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) return RegBalance; @@ -398,9 +398,9 @@ static const unsigned FactorOne = 2; /// Returns single number reflecting benefit of scheduling SU /// in the current cycle. -signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { +int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { // Initial trivial priority. - signed ResCount = 1; + int ResCount = 1; // Do not waste time on a node that is already scheduled. if (SU->isScheduled) @@ -601,7 +601,7 @@ SUnit *ResourcePriorityQueue::pop() { std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { - signed BestCost = SUSchedulingCost(*Best); + int BestCost = SUSchedulingCost(*Best); for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index c27f8de601f24..237d541b4cb97 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H #define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H -#include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/DataTypes.h" +#include <utility> namespace llvm { @@ -56,7 +56,8 @@ public: // Constructor for non-constants. SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) { + : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), + IsIndirect(indir) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; @@ -65,7 +66,8 @@ public: // Constructor for constants. SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { + : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), + IsIndirect(false) { kind = CONST; u.Const = C; } @@ -73,7 +75,8 @@ public: // Constructor for frame indices. SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { + : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), + IsIndirect(false) { kind = FRAMEIX; u.FrameIx = FI; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 91024e672f9c3..802c459a02237 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -336,8 +336,8 @@ void ScheduleDAGRRList::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG(for (SUnit &SU : SUnits) + SU.dumpAll(this)); Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); @@ -1027,43 +1027,37 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SmallVector<SDep, 4> LoadPreds; SmallVector<SDep, 4> NodePreds; SmallVector<SDep, 4> NodeSuccs; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainPreds.push_back(*I); - else if (isOperandOf(I->getSUnit(), LoadNode)) - LoadPreds.push_back(*I); + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPreds.push_back(Pred); + else if (isOperandOf(Pred.getSUnit(), LoadNode)) + LoadPreds.push_back(Pred); else - NodePreds.push_back(*I); + NodePreds.push_back(Pred); } - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainSuccs.push_back(*I); + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); else - NodeSuccs.push_back(*I); + NodeSuccs.push_back(Succ); } // Now assign edges to the newly-created nodes. - for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { - const SDep &Pred = ChainPreds[i]; + for (const SDep &Pred : ChainPreds) { RemovePred(SU, Pred); if (isNewLoad) AddPred(LoadSU, Pred); } - for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { - const SDep &Pred = LoadPreds[i]; + for (const SDep &Pred : LoadPreds) { RemovePred(SU, Pred); if (isNewLoad) AddPred(LoadSU, Pred); } - for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { - const SDep &Pred = NodePreds[i]; + for (const SDep &Pred : NodePreds) { RemovePred(SU, Pred); AddPred(NewSU, Pred); } - for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { - SDep D = NodeSuccs[i]; + for (SDep D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1074,8 +1068,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) --NewSU->NumRegDefsLeft; } - for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { - SDep D = ChainSuccs[i]; + for (SDep D : ChainSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1108,29 +1101,27 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (!I->isArtificial()) - AddPred(NewSU, *I); + for (SDep &Pred : SU->Preds) + if (!Pred.isArtificial()) + AddPred(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(NewSU); AddPred(SuccSU, D); D.setSUnit(SU); DelDeps.push_back(std::make_pair(SuccSU, D)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) - RemovePred(DelDeps[i].first, DelDeps[i].second); + for (auto &DelDep : DelDeps) + RemovePred(DelDep.first, DelDep.second); AvailableQueue->updateNode(SU); AvailableQueue->addNode(NewSU); @@ -1156,16 +1147,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(CopyToSU); AddPred(SuccSU, D); - DelDeps.push_back(std::make_pair(SuccSU, *I)); + DelDeps.push_back(std::make_pair(SuccSU, Succ)); } else { // Avoid scheduling the def-side copy before other successors. Otherwise @@ -1174,8 +1164,8 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) - RemovePred(DelDeps[i].first, DelDeps[i].second); + for (auto &DelDep : DelDeps) + RemovePred(DelDep.first, DelDep.second); SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); @@ -1400,16 +1390,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies // to resolve it. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - SUnit *TrySU = Interferences[i]; + for (SUnit *TrySU : Interferences) { SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; // Try unscheduling up to the point where it's safe to schedule // this node. SUnit *BtSU = nullptr; unsigned LiveCycle = UINT_MAX; - for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { - unsigned Reg = LRegs[j]; + for (unsigned Reg : LRegs) { if (LiveRegGens[Reg]->getHeight() < LiveCycle) { BtSU = LiveRegGens[Reg]; LiveCycle = BtSU->getHeight(); @@ -1854,10 +1842,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { return SethiUllmanNumber; unsigned Extra = 0; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - SUnit *PredSU = I->getSUnit(); + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); if (PredSethiUllman > SethiUllmanNumber) { SethiUllmanNumber = PredSethiUllman; @@ -1879,8 +1866,8 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { void RegReductionPQBase::CalculateSethiUllmanNumbers() { SethiUllmanNumbers.assign(SUnits->size(), 0); - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); + for (const SUnit &SU : *SUnits) + CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); } void RegReductionPQBase::addNode(const SUnit *SU) { @@ -1956,11 +1943,10 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { if (!TLI) return false; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumRegDefsLeft is zero when enough uses of this node have been scheduled // to cover the number of registers defined (they are all live). if (PredSU->NumRegDefsLeft == 0) { @@ -2006,11 +1992,10 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { LiveUses = 0; int PDiff = 0; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumRegDefsLeft is zero when enough uses of this node have been scheduled // to cover the number of registers defined (they are all live). if (PredSU->NumRegDefsLeft == 0) { @@ -2050,11 +2035,10 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { if (!SU->getNode()) return; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumRegDefsLeft is zero when enough uses of this node have been scheduled // to cover the number of registers defined (they are all live). if (PredSU->NumRegDefsLeft == 0) { @@ -2132,11 +2116,10 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { return; } - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only // counts data deps. if (PredSU->NumSuccsLeft != PredSU->Succs.size()) @@ -2201,15 +2184,14 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { /// closest to the current cycle. static unsigned closestSucc(const SUnit *SU) { unsigned MaxHeight = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain succs - unsigned Height = I->getSUnit()->getHeight(); + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; // ignore chain succs + unsigned Height = Succ.getSUnit()->getHeight(); // If there are bunch of CopyToRegs stacked up, they should be considered // to be at the same position. - if (I->getSUnit()->getNode() && - I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) - Height = closestSucc(I->getSUnit())+1; + if (Succ.getSUnit()->getNode() && + Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(Succ.getSUnit())+1; if (Height > MaxHeight) MaxHeight = Height; } @@ -2220,9 +2202,8 @@ static unsigned closestSucc(const SUnit *SU) { /// for scratch registers, i.e. number of data dependencies. static unsigned calcMaxScratches(const SUnit *SU) { unsigned Scratches = 0; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds Scratches++; } return Scratches; @@ -2232,10 +2213,9 @@ static unsigned calcMaxScratches(const SUnit *SU) { /// CopyFromReg from a virtual register. static bool hasOnlyLiveInOpers(const SUnit *SU) { bool RetVal = false; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - const SUnit *PredSU = I->getSUnit(); + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; + const SUnit *PredSU = Pred.getSUnit(); if (PredSU->getNode() && PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { unsigned Reg = @@ -2255,10 +2235,9 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { bool RetVal = false; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - const SUnit *SuccSU = I->getSUnit(); + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; + const SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); @@ -2293,10 +2272,9 @@ static void initVRegCycle(SUnit *SU) { SU->isVRegCycle = true; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - I->getSUnit()->isVRegCycle = true; + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; + Pred.getSUnit()->isVRegCycle = true; } } @@ -2306,14 +2284,13 @@ static void resetVRegCycle(SUnit *SU) { if (!SU->isVRegCycle) return; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - SUnit *PredSU = I->getSUnit(); + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); if (PredSU->isVRegCycle) { assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && "VRegCycle def must be CopyFromReg"); - I->getSUnit()->isVRegCycle = 0; + Pred.getSUnit()->isVRegCycle = false; } } } @@ -2325,11 +2302,10 @@ static bool hasVRegCycleUse(const SUnit *SU) { if (SU->isVRegCycle) return false; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - if (I->getSUnit()->isVRegCycle && - I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + if (Pred.getSUnit()->isVRegCycle && + Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); return true; } @@ -2684,11 +2660,9 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { CalculateSethiUllmanNumbers(); // For single block loops, mark nodes that look like canonical IV increments. - if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { - for (unsigned i = 0, e = sunits.size(); i != e; ++i) { - initVRegCycle(&sunits[i]); - } - } + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) + for (SUnit &SU : sunits) + initVRegCycle(&SU); } //===----------------------------------------------------------------------===// @@ -2726,16 +2700,15 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, if(!ImpDefs && !RegMask) return false; - for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); - SI != SE; ++SI) { - SUnit *SuccSU = SI->getSUnit(); - for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), - PE = SuccSU->Preds.end(); PI != PE; ++PI) { - if (!PI->isAssignedRegDep()) + for (const SDep &Succ : SU->Succs) { + SUnit *SuccSU = Succ.getSUnit(); + for (const SDep &SuccPred : SuccSU->Preds) { + if (!SuccPred.isAssignedRegDep()) continue; - if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + if (RegMask && + MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) return true; if (ImpDefs) @@ -2743,8 +2716,8 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) return true; } } @@ -2823,19 +2796,18 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, /// void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Visit all the nodes in topological order, working top-down. - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { - SUnit *SU = &(*SUnits)[i]; + for (SUnit &SU : *SUnits) { // For now, only look at nodes with no data successors, such as stores. // These are especially important, due to the heuristics in // getNodePriority for nodes with no data successors. - if (SU->NumSuccs != 0) + if (SU.NumSuccs != 0) continue; // For now, only look at nodes with exactly one data predecessor. - if (SU->NumPreds != 1) + if (SU.NumPreds != 1) continue; // Avoid prescheduling copies to virtual registers, which don't behave // like other nodes from the perspective of scheduling heuristics. - if (SDNode *N = SU->getNode()) + if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && TargetRegisterInfo::isVirtualRegister (cast<RegisterSDNode>(N->getOperand(1))->getReg())) @@ -2843,10 +2815,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Locate the single data predecessor. SUnit *PredSU = nullptr; - for (SUnit::const_pred_iterator II = SU->Preds.begin(), - EE = SU->Preds.end(); II != EE; ++II) - if (!II->isCtrl()) { - PredSU = II->getSUnit(); + for (const SDep &Pred : SU.Preds) + if (!Pred.isCtrl()) { + PredSU = Pred.getSUnit(); break; } assert(PredSU); @@ -2860,44 +2831,43 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { continue; // Avoid prescheduling to copies from virtual registers, which don't behave // like other nodes from the perspective of scheduling heuristics. - if (SDNode *N = SU->getNode()) + if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && TargetRegisterInfo::isVirtualRegister (cast<RegisterSDNode>(N->getOperand(1))->getReg())) continue; // Perform checks on the successors of PredSU. - for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), - EE = PredSU->Succs.end(); II != EE; ++II) { - SUnit *PredSuccSU = II->getSUnit(); - if (PredSuccSU == SU) continue; + for (const SDep &PredSucc : PredSU->Succs) { + SUnit *PredSuccSU = PredSucc.getSUnit(); + if (PredSuccSU == &SU) continue; // If PredSU has another successor with no data successors, for // now don't attempt to choose either over the other. if (PredSuccSU->NumSuccs == 0) goto outer_loop_continue; // Don't break physical register dependencies. - if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) - if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) goto outer_loop_continue; // Don't introduce graph cycles. - if (scheduleDAG->IsReachable(SU, PredSuccSU)) + if (scheduleDAG->IsReachable(&SU, PredSuccSU)) goto outer_loop_continue; } // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum + DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" << PredSU->NodeNum << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; assert(!Edge.isAssignedRegDep()); SUnit *SuccSU = Edge.getSUnit(); - if (SuccSU != SU) { + if (SuccSU != &SU) { Edge.setSUnit(PredSU); scheduleDAG->RemovePred(SuccSU, Edge); - scheduleDAG->AddPred(SU, Edge); - Edge.setSUnit(SU); + scheduleDAG->AddPred(&SU, Edge); + Edge.setSUnit(&SU); scheduleDAG->AddPred(SuccSU, Edge); --i; } @@ -2914,16 +2884,15 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. void RegReductionPQBase::AddPseudoTwoAddrDeps() { - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { - SUnit *SU = &(*SUnits)[i]; - if (!SU->isTwoAddress) + for (SUnit &SU : *SUnits) { + if (!SU.isTwoAddress) continue; - SDNode *Node = SU->getNode(); - if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) + SDNode *Node = SU.getNode(); + if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode()) continue; - bool isLiveOut = hasOnlyLiveOutUses(SU); + bool isLiveOut = hasOnlyLiveOutUses(&SU); unsigned Opc = Node->getMachineOpcode(); const MCInstrDesc &MCID = TII->get(Opc); unsigned NumRes = MCID.getNumDefs(); @@ -2931,21 +2900,22 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { for (unsigned j = 0; j != NumOps; ++j) { if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) continue; - SDNode *DU = SU->getNode()->getOperand(j).getNode(); + SDNode *DU = SU.getNode()->getOperand(j).getNode(); if (DU->getNodeId() == -1) continue; const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; - if (!DUSU) continue; - for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), - E = DUSU->Succs.end(); I != E; ++I) { - if (I->isCtrl()) continue; - SUnit *SuccSU = I->getSUnit(); - if (SuccSU == SU) + if (!DUSU) + continue; + for (const SDep &Succ : DUSU->Succs) { + if (Succ.isCtrl()) + continue; + SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU == &SU) continue; // Be conservative. Ignore if nodes aren't at roughly the same // depth and height. - if (SuccSU->getHeight() < SU->getHeight() && - (SU->getHeight() - SuccSU->getHeight()) > 1) + if (SuccSU->getHeight() < SU.getHeight() && + (SU.getHeight() - SuccSU->getHeight()) > 1) continue; // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge // constrains whatever is using the copy, instead of the copy @@ -2961,8 +2931,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { continue; // Don't constrain nodes with physical register defs if the // predecessor can clobber them. - if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { - if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) continue; } // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; @@ -2972,14 +2942,14 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { SuccOpc == TargetOpcode::INSERT_SUBREG || SuccOpc == TargetOpcode::SUBREG_TO_REG) continue; - if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && + if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && (!canClobber(SuccSU, DUSU) || (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || - (!SU->isCommutable && SuccSU->isCommutable)) && - !scheduleDAG->IsReachable(SuccSU, SU)) { + (!SU.isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, &SU)) { DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" - << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); + << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); } } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 2a6c853a1d110..3be622f8c179a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -321,7 +321,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; - SmallPtrSet<SDNode*, 64> Visited; + SmallPtrSet<SDNode*, 32> Visited; Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); @@ -750,7 +750,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, return; } - Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos()))); + Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos()))); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 893871f944857..8235522b14bd1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -33,7 +34,6 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" @@ -46,7 +46,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> @@ -94,8 +93,22 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // ISD Namespace //===----------------------------------------------------------------------===// -/// isBuildVectorAllOnes - Return true if the specified node is a -/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { + auto *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) && + EltVT.getSizeInBits() >= SplatBitSize; +} + +// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be +// specializations of the more general isConstantSplatVector()? + bool ISD::isBuildVectorAllOnes(const SDNode *N) { // Look through a bit convert. while (N->getOpcode() == ISD::BITCAST) @@ -106,7 +119,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { unsigned i = 0, e = N->getNumOperands(); // Skip over all of the undef values. - while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + while (i != e && N->getOperand(i).isUndef()) ++i; // Do not accept an all-undef vector. @@ -135,15 +148,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { // undefs. Even with the above element type twiddling, this should be OK, as // the same type legalization should have applied to all the elements. for (++i; i != e; ++i) - if (N->getOperand(i) != NotZero && - N->getOperand(i).getOpcode() != ISD::UNDEF) + if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef()) return false; return true; } - -/// isBuildVectorAllZeros - Return true if the specified node is a -/// BUILD_VECTOR where all of the elements are 0 or undef. bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Look through a bit convert. while (N->getOpcode() == ISD::BITCAST) @@ -153,7 +162,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { bool IsAllUndef = true; for (const SDValue &Op : N->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; IsAllUndef = false; // Do not accept build_vectors that aren't all constants or which have non-0 @@ -181,14 +190,12 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { return true; } -/// \brief Return true if the specified node is a BUILD_VECTOR node of -/// all ConstantSDNode or undef. bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (const SDValue &Op : N->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; if (!isa<ConstantSDNode>(Op)) return false; @@ -196,14 +203,12 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { return true; } -/// \brief Return true if the specified node is a BUILD_VECTOR node of -/// all ConstantFPSDNode or undef. bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (const SDValue &Op : N->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; if (!isa<ConstantFPSDNode>(Op)) return false; @@ -211,8 +216,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { return true; } -/// allOperandsUndef - Return true if the node has at least one operand -/// and all operands of the specified node are ISD::UNDEF. bool ISD::allOperandsUndef(const SDNode *N) { // Return false if the node has no operands. // This is "logically inconsistent" with the definition of "all" but @@ -221,7 +224,7 @@ bool ISD::allOperandsUndef(const SDNode *N) { return false; for (const SDValue &Op : N->op_values()) - if (Op.getOpcode() != ISD::UNDEF) + if (!Op.isUndef()) return false; return true; @@ -242,8 +245,6 @@ ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { llvm_unreachable("Invalid LoadExtType"); } -/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) -/// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { // To perform this operation, we just need to swap the L and G bits of the // operation. @@ -254,8 +255,6 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { (OldG << 2)); // New L bit. } -/// getSetCCInverse - Return the operation corresponding to !(X op Y), where -/// 'op' is a valid SetCC operation. ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { unsigned Operation = Op; if (isInteger) @@ -270,9 +269,9 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { } -/// isSignedOp - For an integer comparison, return 1 if the comparison is a -/// signed operation and 2 if the result is an unsigned comparison. Return zero -/// if the operation does not depend on the sign of the input (setne and seteq). +/// For an integer comparison, return 1 if the comparison is a signed operation +/// and 2 if the result is an unsigned comparison. Return zero if the operation +/// does not depend on the sign of the input (setne and seteq). static int isSignedOp(ISD::CondCode Opcode) { switch (Opcode) { default: llvm_unreachable("Illegal integer setcc operation!"); @@ -289,10 +288,6 @@ static int isSignedOp(ISD::CondCode Opcode) { } } -/// getSetCCOrOperation - Return the result of a logical OR between different -/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function -/// returns SETCC_INVALID if it is not possible to represent the resultant -/// comparison. ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, bool isInteger) { if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) @@ -313,10 +308,6 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, return ISD::CondCode(Op); } -/// getSetCCAndOperation - Return the result of a logical AND between different -/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This -/// function returns zero if it is not possible to represent the resultant -/// comparison. ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, bool isInteger) { if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) @@ -644,7 +635,8 @@ void SelectionDAG::DeleteNode(SDNode *N) { } void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { - assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->getIterator() != AllNodes.begin() && + "Cannot delete the entry node!"); assert(N->use_empty() && "Cannot delete a node that is not dead!"); // Drop all of the operands and decrement used node's use counts. @@ -663,8 +655,8 @@ void SDDbgInfo::erase(const SDNode *Node) { } void SelectionDAG::DeallocateNode(SDNode *N) { - if (N->OperandsNeedDelete) - delete[] N->OperandList; + // If we have operands, deallocate them. + removeOperands(N); // Set the opcode to DELETED_NODE to help catch bugs when node // memory is reallocated. @@ -832,7 +824,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) if (const SDNodeFlags *Flags = N->getFlags()) Node->intersectFlagsWith(Flags); @@ -853,7 +845,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) if (const SDNodeFlags *Flags = N->getFlags()) Node->intersectFlagsWith(Flags); @@ -873,16 +865,13 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) if (const SDNodeFlags *Flags = N->getFlags()) Node->intersectFlagsWith(Flags); return Node; } -/// getEVTAlignment - Compute the default alignment value for the -/// given type. -/// unsigned SelectionDAG::getEVTAlignment(EVT VT) const { Type *Ty = VT == MVT::iPTR ? PointerType::get(Type::getInt8Ty(*getContext()), 0) : @@ -911,6 +900,7 @@ void SelectionDAG::init(MachineFunction &mf) { SelectionDAG::~SelectionDAG() { assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); allnodes_clear(); + OperandRecycler.clear(OperandAllocator); delete DbgInfo; } @@ -924,24 +914,26 @@ void SelectionDAG::allnodes_clear() { #endif } -BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, - SDVTList VTs, SDValue N1, - SDValue N2, - const SDNodeFlags *Flags) { +SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTs, SDValue N1, SDValue N2, + const SDNodeFlags *Flags) { + SDValue Ops[] = {N1, N2}; + if (isBinOpWithFlags(Opcode)) { // If no flags were passed in, use a default flags object. SDNodeFlags F; if (Flags == nullptr) Flags = &F; - BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( - Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, *Flags); + auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, *Flags); + createOperands(FN, Ops); return FN; } - BinarySDNode *N = new (NodeAllocator) - BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); return N; } @@ -961,19 +953,25 @@ SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, } SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, - DebugLoc DL, void *&InsertPos) { + const SDLoc &DL, void *&InsertPos) { SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); if (N) { switch (N->getOpcode()) { - default: break; // Process only regular (non-target) constant nodes. case ISD::Constant: case ISD::ConstantFP: // Erase debug location from the node if the node is used at several - // different places to do not propagate one location to all uses as it - // leads to incorrect debug info. - if (N->getDebugLoc() != DL) + // different places. Do not propagate one location to all uses as it + // will cause a worse single stepping debugging experience. + if (N->getDebugLoc() != DL.getDebugLoc()) N->setDebugLoc(DebugLoc()); break; + default: + // When the node's point of use is located earlier in the instruction + // sequence than its prior point of use, update its debug info to the + // earlier location. + if (DL.getIROrder() && DL.getIROrder() < N->getIROrder()) + N->setDebugLoc(DL.getDebugLoc()); + break; } } return N; @@ -981,6 +979,7 @@ SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void SelectionDAG::clear() { allnodes_clear(); + OperandRecycler.clear(OperandAllocator); OperandAllocator.Reset(); CSEMap.clear(); @@ -999,25 +998,25 @@ void SelectionDAG::clear() { DbgInfo->clear(); } -SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::SIGN_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ZERO_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT) { if (VT.bitsLE(Op.getValueType())) return getNode(ISD::TRUNCATE, SL, VT, Op); @@ -1026,7 +1025,7 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, return getNode(TLI->getExtendForContent(BType), SL, VT, Op); } -SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); @@ -1038,7 +1037,8 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, DL, Op.getValueType())); } -SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, + EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && "The sizes of the input and result must match in order to perform the " @@ -1048,7 +1048,8 @@ SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); } -SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, + EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && "The sizes of the input and result must match in order to perform the " @@ -1058,7 +1059,8 @@ SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); } -SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, + EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && "The sizes of the input and result must match in order to perform the " @@ -1070,14 +1072,14 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// -SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { +SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { +SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue TrueValue; switch (TLI->getBooleanContents(VT)) { @@ -1093,8 +1095,8 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, TrueValue); } -SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, - bool isO) { +SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, + bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && @@ -1102,14 +1104,13 @@ SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT, - bool isO) -{ +SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT, + bool isT, bool isO) { return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, - bool isT, bool isO) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, + EVT VT, bool isT, bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1134,7 +1135,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, else if (NewNodesMustHaveLegalTypes && VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypeExpandInteger) { - APInt NewVal = Elt->getValue(); + const APInt &NewVal = Elt->getValue(); EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; @@ -1168,9 +1169,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); - SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, - getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, - Ops)); + SDValue Result = getNode(ISD::BITCAST, DL, VT, + getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops)); return Result; } @@ -1183,37 +1183,34 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, ID.AddBoolean(isO); void *IP = nullptr; SDNode *N = nullptr; - if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) + if ((N = FindNodeOrInsertPos(ID, DL, IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(), - EltVT); + N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } SDValue Result(N, 0); - if (VT.isVector()) { - SmallVector<SDValue, 8> Ops; - Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); - } + if (VT.isVector()) + Result = getSplatBuildVector(VT, DL, Result); return Result; } -SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) { +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, + bool isTarget) { return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); } -SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT, +SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, bool isTarget) { return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); } -SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, - bool isTarget){ +SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, + EVT VT, bool isTarget) { assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); EVT EltVT = VT.getScalarType(); @@ -1227,47 +1224,42 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; - if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) + if ((N = FindNodeOrInsertPos(ID, DL, IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(), - EltVT); + N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } SDValue Result(N, 0); - if (VT.isVector()) { - SmallVector<SDValue, 8> Ops; - Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); - } + if (VT.isVector()) + Result = getSplatBuildVector(VT, DL, Result); return Result; } -SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT, +SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget) { EVT EltVT = VT.getScalarType(); - if (EltVT==MVT::f32) + if (EltVT == MVT::f32) return getConstantFP(APFloat((float)Val), DL, VT, isTarget); - else if (EltVT==MVT::f64) + else if (EltVT == MVT::f64) return getConstantFP(APFloat(Val), DL, VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || - EltVT==MVT::f16) { - bool ignored; - APFloat apf = APFloat(Val); - apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, - &ignored); - return getConstantFP(apf, DL, VT, isTarget); + else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || + EltVT == MVT::f16) { + bool Ignored; + APFloat APF = APFloat(Val); + APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &Ignored); + return getConstantFP(APF, DL, VT, isTarget); } else llvm_unreachable("Unsupported type in getConstantFP"); } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, - EVT VT, int64_t Offset, - bool isTargetGA, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, + EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); @@ -1290,12 +1282,11 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, ID.AddInteger(TargetFlags); ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), - DL.getDebugLoc(), GV, VT, - Offset, TargetFlags); + auto *N = newSDNode<GlobalAddressSDNode>( + Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1310,7 +1301,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); + auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1329,8 +1320,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, - TargetFlags); + auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1355,8 +1345,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, - Alignment, TargetFlags); + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1382,8 +1372,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, - Alignment, TargetFlags); + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1400,8 +1390,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = - new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); + auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1415,7 +1404,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); + auto *N = newSDNode<BasicBlockSDNode>(MBB); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1430,7 +1419,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) VTSDNode(VT); + N = newSDNode<VTSDNode>(VT); InsertNode(N); return SDValue(N, 0); } @@ -1438,7 +1427,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); + N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT); InsertNode(N); return SDValue(N, 0); } @@ -1447,7 +1436,7 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { SDNode *&N = MCSymbols[Sym]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) MCSymbolSDNode(Sym, VT); + N = newSDNode<MCSymbolSDNode>(Sym, VT); InsertNode(N); return SDValue(N, 0); } @@ -1458,7 +1447,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, TargetFlags)]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); + N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); InsertNode(N); return SDValue(N, 0); } @@ -1468,7 +1457,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { CondCodeNodes.resize(Cond+1); if (!CondCodeNodes[Cond]) { - CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); + auto *N = newSDNode<CondCodeSDNode>(Cond); CondCodeNodes[Cond] = N; InsertNode(N); } @@ -1476,41 +1465,42 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { return SDValue(CondCodeNodes[Cond], 0); } -// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in -// the shuffle mask M that point at N1 to point at N2, and indices that point -// N2 to point at N1. -static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { +/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that +/// point at N1 to point at N2 and indices that point at N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { std::swap(N1, N2); ShuffleVectorSDNode::commuteMask(M); } -SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, - SDValue N2, const int *Mask) { +SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, + SDValue N2, ArrayRef<int> Mask) { + assert(VT.getVectorNumElements() == Mask.size() && + "Must have the same number of vector elements as mask elements!"); assert(VT == N1.getValueType() && VT == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); // Canonicalize shuffle undef, undef -> undef - if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + if (N1.isUndef() && N2.isUndef()) return getUNDEF(VT); // Validate that all indices in Mask are within the range of the elements // input to the shuffle. - unsigned NElts = VT.getVectorNumElements(); - SmallVector<int, 8> MaskVec; - for (unsigned i = 0; i != NElts; ++i) { - assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); - MaskVec.push_back(Mask[i]); - } + int NElts = Mask.size(); + assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + "Index out of range"); + + // Copy the mask so we can do any needed cleanup. + SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end()); // Canonicalize shuffle v, v -> v, undef if (N1 == N2) { N2 = getUNDEF(VT); - for (unsigned i = 0; i != NElts; ++i) - if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + for (int i = 0; i != NElts; ++i) + if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; } // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) commuteShuffle(N1, N2, MaskVec); // If shuffling a splat, try to blend the splat instead. We do this here so @@ -1521,8 +1511,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (!Splat) return; - for (int i = 0; i < (int)NElts; ++i) { - if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts)) + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) continue; // If this input comes from undef, mark it as such. @@ -1544,9 +1534,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef bool AllLHS = true, AllRHS = true; - bool N2Undef = N2.getOpcode() == ISD::UNDEF; - for (unsigned i = 0; i != NElts; ++i) { - if (MaskVec[i] >= (int)NElts) { + bool N2Undef = N2.isUndef(); + for (int i = 0; i != NElts; ++i) { + if (MaskVec[i] >= NElts) { if (N2Undef) MaskVec[i] = -1; else @@ -1564,15 +1554,15 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, commuteShuffle(N1, N2, MaskVec); } // Reset our undef status after accounting for the mask. - N2Undef = N2.getOpcode() == ISD::UNDEF; + N2Undef = N2.isUndef(); // Re-check whether both sides ended up undef. - if (N1.getOpcode() == ISD::UNDEF && N2Undef) + if (N1.isUndef() && N2Undef) return getUNDEF(VT); // If Identity shuffle return that node. bool Identity = true, AllSame = true; - for (unsigned i = 0; i != NElts; ++i) { - if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + for (int i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; if (MaskVec[i] != MaskVec[0]) AllSame = false; } if (Identity && NElts) @@ -1592,7 +1582,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, BitVector UndefElements; SDValue Splat = BV->getSplatValue(&UndefElements); // If this is a splat of an undef, shuffling it is also undef. - if (Splat && Splat.getOpcode() == ISD::UNDEF) + if (Splat && Splat.isUndef()) return getUNDEF(VT); bool SameNumElts = @@ -1612,11 +1602,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, // If the shuffle itself creates a splat, build the vector directly. if (AllSame && SameNumElts) { - const SDValue &Splatted = BV->getOperand(MaskVec[0]); - SmallVector<SDValue, 8> Ops(NElts, Splatted); - EVT BuildVT = BV->getValueType(0); - SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops); + const SDValue &Splatted = BV->getOperand(MaskVec[0]); + SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted); // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. @@ -1630,23 +1618,23 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); - for (unsigned i = 0; i != NElts; ++i) + for (int i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when // the node is deallocated, but recovered when the NodeAllocator is released. int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); - memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc); + + auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(), + dl.getDebugLoc(), MaskAlloc); + createOperands(N, Ops); - ShuffleVectorSDNode *N = - new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), - dl.getDebugLoc(), N1, N2, - MaskAlloc); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1659,13 +1647,12 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { SDValue Op0 = SV.getOperand(0); SDValue Op1 = SV.getOperand(1); - return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]); + return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); } -SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, - SDValue Val, SDValue DTy, - SDValue STy, SDValue Rnd, SDValue Sat, - ISD::CvtCode Code) { +SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val, + SDValue DTy, SDValue STy, SDValue Rnd, + SDValue Sat, ISD::CvtCode Code) { // If the src and dest types are the same and the conversion is between // integer types of the same sign or two floats, no conversion is necessary. if (DTy == STy && @@ -1676,12 +1663,13 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), - dl.getDebugLoc(), - Ops, Code); + auto *N = + newSDNode<CvtRndSatSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), Code); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1695,7 +1683,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); + auto *N = newSDNode<RegisterSDNode>(RegNo, VT); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1709,13 +1697,14 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + auto *N = newSDNode<RegisterMaskSDNode>(RegMask); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { +SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, + MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); @@ -1724,14 +1713,14 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), - dl.getDebugLoc(), Root, Label); + auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } - SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset, bool isTarget, @@ -1747,8 +1736,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, - TargetFlags); + auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1766,13 +1754,12 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SrcValueSDNode(V); + auto *N = newSDNode<SrcValueSDNode>(V); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -/// getMDNode - Return an MDNodeSDNode which holds an MDNode. SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); @@ -1782,7 +1769,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + auto *N = newSDNode<MDNodeSDNode>(MD); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1795,8 +1782,7 @@ SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { return getNode(ISD::BITCAST, SDLoc(V), VT, V); } -/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. -SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, +SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS) { SDValue Ops[] = {Ptr}; FoldingSetNodeID ID; @@ -1805,12 +1791,13 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, ID.AddInteger(DestAS); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), - dl.getDebugLoc(), - VT, Ptr, SrcAS, DestAS); + auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VT, SrcAS, DestAS); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1835,9 +1822,8 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { SDValue Tmp2 = Node->getOperand(1); unsigned Align = Node->getConstantOperandVal(3); - SDValue VAListLoad = - getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, false, 0); + SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, + Tmp2, MachinePointerInfo(V)); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -1856,11 +1842,10 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { VT.getTypeForEVT(*getContext())), dl, VAList.getValueType())); // Store the incremented VAList to the legalized pointer - Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, 0); + Tmp1 = + getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V)); // Load the actual argument out of the pointer VAList - return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), - false, false, false, 0); + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo()); } SDValue SelectionDAG::expandVACopy(SDNode *Node) { @@ -1870,15 +1855,13 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) { // output, returning the chain. const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); - SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl, - Node->getOperand(0), Node->getOperand(2), - MachinePointerInfo(VS), false, false, false, 0); + SDValue Tmp1 = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0), + Node->getOperand(2), MachinePointerInfo(VS)); return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), - MachinePointerInfo(VD), false, false, 0); + MachinePointerInfo(VD)); } -/// CreateStackTemporary - Create a stack temporary, suitable for holding the -/// specified value type. SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); @@ -1890,8 +1873,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } -/// CreateStackTemporary - Create a stack temporary suitable for holding -/// either of the specified value types. SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); @@ -1905,8 +1886,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } -SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, - SDValue N2, ISD::CondCode Cond, SDLoc dl) { +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, + ISD::CondCode Cond, const SDLoc &dl) { // These setcc operations always fold. switch (Cond) { default: break; @@ -2469,6 +2450,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne.trunc(BitWidth); break; } + case ISD::BSWAP: { + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + KnownZero = KnownZero2.byteSwap(); + KnownOne = KnownOne2.byteSwap(); + break; + } case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -2506,12 +2493,36 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } -/// ComputeNumSignBits - Return the number of times the sign bit of the -/// register is replicated into the other bits. We know that at least 1 bit -/// is always equal to the sign bit (itself), but other cases can give us -/// information. For example, immediately after an "SRA X, 2", we know that -/// the top 3 bits are all equal to each other, so we return 3. -unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ +bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { + // A left-shift of a constant one will have exactly one bit set because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) { + auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0)); + if (C && C->getAPIntValue() == 1) + return true; + } + + // Similarly, a logical right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) { + auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0)); + if (C && C->getAPIntValue().isSignBit()) + return true; + } + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to computeKnownBits to catch other known cases. + EVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); + APInt KnownZero, KnownOne; + computeKnownBits(Val, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2761,11 +2772,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); } -/// isBaseWithConstantOffset - Return true if the specified operand is an -/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an -/// ISD::OR with a ConstantSDNode that is guaranteed to have the same -/// semantics as an ADD. This handles the equivalence: -/// X|Cst == X+Cst iff X&Cst = 0. bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || !isa<ConstantSDNode>(Op.getOperand(1))) @@ -2779,7 +2785,6 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { return true; } - bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. if (getTarget().Options.NoNaNsFPMath) @@ -2834,28 +2839,30 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { return (AZero | BZero).isAllOnesValue(); } -static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops, +static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops, llvm::SelectionDAG &DAG) { if (Ops.size() == 1) return Ops[0]; // Concat of UNDEFs is UNDEF. - if (std::all_of(Ops.begin(), Ops.end(), - [](SDValue Op) { return Op.isUndef(); })) + if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) return DAG.getUNDEF(VT); - // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified - // to one big BUILD_VECTOR. - // FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well. - if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) { - return Op.getOpcode() == ISD::BUILD_VECTOR; - })) - return SDValue(); - + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be + // simplified to one big BUILD_VECTOR. + // FIXME: Add support for SCALAR_TO_VECTOR as well. EVT SVT = VT.getScalarType(); SmallVector<SDValue, 16> Elts; - for (SDValue Op : Ops) - Elts.append(Op->op_begin(), Op->op_end()); + for (SDValue Op : Ops) { + EVT OpVT = Op.getValueType(); + if (Op.isUndef()) + Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); + else if (Op.getOpcode() == ISD::BUILD_VECTOR) + Elts.append(Op->op_begin(), Op->op_end()); + else + return SDValue(); + } // BUILD_VECTOR requires all inputs to be of the same type, find the // maximum type and extend them all. @@ -2871,25 +2878,24 @@ static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops, return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } -/// getNode - Gets or creates the specified node. -/// -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { +/// Gets or creates the specified node. +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), None); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), getVTList(VT)); + auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), + getVTList(VT)); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - EVT VT, SDValue Operand) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue Operand) { // Constant fold unary operations with an integer constant operand. Even // opaque constant will be folded, because the folding of unary operations // doesn't create new constants with different values. Nevertheless, the @@ -3054,7 +3060,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); - if (Operand.getOpcode() == ISD::UNDEF) + if (Operand.isUndef()) return getUNDEF(VT); break; case ISD::SIGN_EXTEND: @@ -3148,6 +3154,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; + case ISD::BITREVERSE: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid BITREVERSE!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; case ISD::BITCAST: // Basic sanity checking. assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() @@ -3192,20 +3204,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDNode *N; SDVTList VTs = getVTList(VT); + SDValue Ops[] = {Operand}; if (VT != MVT::Glue) { // Don't CSE flag producing nodes FoldingSetNodeID ID; - SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, Operand); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, Operand); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); @@ -3250,8 +3262,8 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, return std::make_pair(APInt(1, 0), false); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, - const ConstantSDNode *Cst1, +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, + EVT VT, const ConstantSDNode *Cst1, const ConstantSDNode *Cst2) { if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); @@ -3263,8 +3275,29 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, return getConstant(Folded.first, DL, VT); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, - SDNode *Cst1, SDNode *Cst2) { +SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, + const GlobalAddressSDNode *GA, + const SDNode *N2) { + if (GA->getOpcode() != ISD::GlobalAddress) + return SDValue(); + if (!TLI->isOffsetFoldingLegal(GA)) + return SDValue(); + const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2); + if (!Cst2) + return SDValue(); + int64_t Offset = Cst2->getSExtValue(); + switch (Opcode) { + case ISD::ADD: break; + case ISD::SUB: Offset = -uint64_t(Offset); break; + default: return SDValue(); + } + return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT, + GA->getOffset() + uint64_t(Offset)); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, + EVT VT, SDNode *Cst1, + SDNode *Cst2) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. @@ -3274,21 +3307,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, // Handle the case of two scalars. if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { - if (SDValue Folded = - FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) { - if (!VT.isVector()) - return Folded; - SmallVector<SDValue, 4> Outputs; - // We may have a vector type but a scalar result. Create a splat. - Outputs.resize(VT.getVectorNumElements(), Outputs.back()); - // Build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); - } else { - return SDValue(); - } + SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2); + assert((!Folded || !VT.isVector()) && + "Can't fold vectors ops with scalar operands"); + return Folded; } } + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1)) + return FoldSymbolOffset(Opcode, VT, GA, Cst2); + if (isCommutativeBinOp(Opcode)) + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2)) + return FoldSymbolOffset(Opcode, VT, GA, Cst1); + // For vectors extract each constant element into Inputs so we can constant // fold them individually. BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); @@ -3329,11 +3361,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, Outputs.resize(VT.getVectorNumElements(), Outputs.back()); // Build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); + return getBuildVector(VT, SDLoc(), Outputs); } -SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, - EVT VT, +SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, + const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { // If the opcode is a target-specific ISD node, there's nothing we can @@ -3355,8 +3387,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); - return (Op.getOpcode() == ISD::UNDEF) || - (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant()); + return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) || + (BV && BV->isConstant()); }; // All operands must be vector types with the same number of elements as @@ -3375,7 +3407,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, EVT LegalSVT = VT.getScalarType(); if (LegalSVT.isInteger()) { LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); - if (LegalSVT.bitsLT(SVT)) + if (LegalSVT.bitsLT(VT.getScalarType())) return SDValue(); } @@ -3414,20 +3446,18 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); // Scalar folding only succeeded if the result is a constant or UNDEF. - if (ScalarResult.getOpcode() != ISD::UNDEF && - ScalarResult.getOpcode() != ISD::Constant && + if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && ScalarResult.getOpcode() != ISD::ConstantFP) return SDValue(); ScalarResults.push_back(ScalarResult); } - assert(ScalarResults.size() == NumElts && - "Unexpected number of scalar results for BUILD_VECTOR"); - return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults); + return getBuildVector(VT, DL, ScalarResults); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2, const SDNodeFlags *Flags) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, + const SDNodeFlags *Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -3617,14 +3647,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, }; if (N1C) { - APInt Val = N1C->getAPIntValue(); + const APInt &Val = N1C->getAPIntValue(); return SignExtendInReg(Val); } if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue Op = N1.getOperand(i); - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Ops.push_back(getUNDEF(VT.getScalarType())); continue; } @@ -3637,13 +3667,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, break; } if (Ops.size() == VT.getVectorNumElements()) - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return getBuildVector(VT, DL, Ops); } break; } case ISD::EXTRACT_VECTOR_ELT: // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF @@ -3802,7 +3832,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Canonicalize an UNDEF to the RHS, even over a constant. - if (N1.getOpcode() == ISD::UNDEF) { + if (N1.isUndef()) { if (isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { @@ -3831,10 +3861,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Fold a bunch of operators when the RHS is undef. - if (N2.getOpcode() == ISD::UNDEF) { + if (N2.isUndef()) { switch (Opcode) { case ISD::XOR: - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); @@ -3877,21 +3907,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Memoize this node if possible. - BinarySDNode *N; + SDNode *N; SDVTList VTs = getVTList(VT); if (VT != MVT::Glue) { SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { if (Flags) E->intersectFlagsWith(Flags); return SDValue(E, 0); } N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); - CSEMap.InsertNode(N, IP); } else { N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); @@ -3901,7 +3930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. switch (Opcode) { @@ -3982,36 +4011,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, // Memoize node if it doesn't produce a flag. SDNode *N; SDVTList VTs = getVTList(VT); + SDValue Ops[] = {N1, N2, N3}; if (VT != MVT::Glue) { - SDValue Ops[] = { N1, N2, N3 }; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2, N3); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2, N3); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VT, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4, SDValue N5) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, SDValue N4, + SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; return getNode(Opcode, DL, VT, Ops); } @@ -4041,8 +4069,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, - SDLoc dl) { - assert(Value.getOpcode() != ISD::UNDEF); + const SDLoc &dl) { + assert(!Value.isUndef()); unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { @@ -4069,13 +4097,9 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, } if (VT != Value.getValueType() && !VT.isInteger()) - Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value); - if (VT != Value.getValueType()) { - assert(VT.getVectorElementType() == Value.getValueType() && - "value type should be one vector element here"); - SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value); - Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps); - } + Value = DAG.getBitcast(VT.getScalarType(), Value); + if (VT != Value.getValueType()) + Value = DAG.getSplatBuildVector(VT, dl, Value); return Value; } @@ -4083,7 +4107,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// getMemsetStringVal - Similar to getMemsetValue. Except this is only /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. -static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, +static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { @@ -4124,19 +4148,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, return SDValue(nullptr, 0); } -/// getMemBasePlusOffset - Returns base and offset node for the -/// -static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, - SelectionDAG &DAG) { +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, + const SDLoc &DL) { EVT VT = Base.getValueType(); - return DAG.getNode(ISD::ADD, dl, - VT, Base, DAG.getConstant(Offset, dl, VT)); + return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); } /// isMemSrcFromString - Returns true if memcpy source is a string constant. /// static bool isMemSrcFromString(SDValue Src, StringRef &Str) { - unsigned SrcDelta = 0; + uint64_t SrcDelta = 0; GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast<GlobalAddressSDNode>(Src); @@ -4149,7 +4170,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { if (!G) return false; - return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); + return getConstantStringInfo(G->getGlobal(), Str, + SrcDelta + G->getOffset(), false); } /// Determines the optimal series of memory ops to replace the memset / memcpy. @@ -4163,6 +4185,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, bool ZeroMemset, bool MemcpyStrSrc, bool AllowOverlap, + unsigned DstAS, unsigned SrcAS, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -4179,10 +4202,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - unsigned AS = 0; - if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(AS) || - TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { - VT = TLI.getPointerTy(DAG.getDataLayout()); + if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { + VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); } else { switch (DstAlign & 7) { case 0: VT = MVT::i64; break; @@ -4238,10 +4260,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; - unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast) + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -4267,15 +4288,14 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { return MF.getFunction()->optForSize(); } -static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool isVol, - bool AlwaysInline, +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, + bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. - if (Src.getOpcode() == ISD::UNDEF) + if (Src.isUndef()) return Chain; // Expand memcpy to a series of load and store ops if the size operand falls @@ -4302,7 +4322,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), - false, false, CopyFromStr, true, DAG, TLI)) + false, false, CopyFromStr, true, + DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), + DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -4325,6 +4348,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, } } + MachineMemOperand::Flags MMOFlags = + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector<SDValue, 8> OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; @@ -4351,9 +4376,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); if (Value.getNode()) Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), isVol, - false, Align); + DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); } if (!Store.getNode()) { @@ -4365,13 +4389,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, - getMemBasePlusOffset(Src, SrcOff, dl, DAG), - SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, - false, MinAlign(SrcAlign, SrcOff)); - Store = DAG.getTruncStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), VT, isVol, - false, Align); + DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), VT, + MinAlign(SrcAlign, SrcOff), MMOFlags); + OutChains.push_back(Value.getValue(1)); + Store = DAG.getTruncStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); } OutChains.push_back(Store); SrcOff += VTSize; @@ -4382,15 +4406,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } -static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool isVol, - bool AlwaysInline, +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, + bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. - if (Src.getOpcode() == ISD::UNDEF) + if (Src.isUndef()) return Chain; // Expand memmove to a series of load and store ops if the size operand falls @@ -4411,7 +4434,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, - false, false, false, false, DAG, TLI)) + false, false, false, false, + DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), + DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -4425,6 +4451,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, } } + MachineMemOperand::Flags MMOFlags = + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; uint64_t SrcOff = 0, DstOff = 0; SmallVector<SDValue, 8> LoadValues; SmallVector<SDValue, 8> LoadChains; @@ -4435,10 +4463,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value; - Value = DAG.getLoad(VT, dl, Chain, - getMemBasePlusOffset(Src, SrcOff, dl, DAG), - SrcPtrInfo.getWithOffset(SrcOff), isVol, - false, false, SrcAlign); + Value = + DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -4451,8 +4478,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Store; Store = DAG.getStore(Chain, dl, LoadValues[i], - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); + DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -4478,13 +4505,12 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, /// The function tries to replace 'llvm.memset' intrinsic with several store /// operations and value calculation code. This is usually profitable for small /// memory size. -static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool isVol, +static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. - if (Src.getOpcode() == ISD::UNDEF) + if (Src.isUndef()) return Chain; // Expand memset to a series of load/store ops if the size operand @@ -4502,7 +4528,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - true, IsZeroVal, false, true, DAG, TLI)) + true, IsZeroVal, false, true, + DstPtrInfo.getAddrSpace(), ~0u, + DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -4548,10 +4576,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, Value = getMemsetValue(Src, VT, DAG, dl); } assert(Value.getValueType() == VT && "Value with wrong type."); - SDValue Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), - isVol, false, Align); + SDValue Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; Size -= VTSize; @@ -4570,10 +4598,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } } -SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVol, bool AlwaysInline, - bool isTailCall, MachinePointerInfo DstPtrInfo, +SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool AlwaysInline, bool isTailCall, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4632,10 +4660,10 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), - Type::getVoidTy(*getContext()), + Dst.getValueType().getTypeForEVT(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), TLI->getPointerTy(getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -4643,9 +4671,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, return CallResult.second; } -SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVol, bool isTailCall, +SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4693,10 +4721,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - Type::getVoidTy(*getContext()), + Dst.getValueType().getTypeForEVT(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), TLI->getPointerTy(getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -4704,9 +4732,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, return CallResult.second; } -SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVol, bool isTailCall, +SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4755,10 +4783,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Type::getVoidTy(*getContext()), + Dst.getValueType().getTypeForEVT(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), TLI->getPointerTy(getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -4766,7 +4794,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, return CallResult.second; } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, AtomicOrdering SuccessOrdering, @@ -4777,41 +4805,31 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - // Allocate the operands array for the node out of the BumpPtrAllocator, since - // SDNode doesn't have access to it. This memory will be "leaked" when - // the node is deallocated, but recovered when the allocator is released. - // If the number of operands is less than 5 we use AtomicSDNode's internal - // storage. - unsigned NumOps = Ops.size(); - SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) - : nullptr; - - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTList, MemVT, - Ops.data(), DynOps, NumOps, MMO, - SuccessOrdering, FailureOrdering, - SynchScope); + auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO, SuccessOrdering, + FailureOrdering, SynchScope); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO, - AtomicOrdering Ordering, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering, Ordering, SynchScope); } SDValue SelectionDAG::getAtomicCmpSwap( - unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, + unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { @@ -4826,10 +4844,8 @@ SDValue SelectionDAG::getAtomicCmpSwap( // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - unsigned Flags = MachineMemOperand::MOVolatile; - Flags |= MachineMemOperand::MOLoad; - Flags |= MachineMemOperand::MOStore; - + auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | + MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); @@ -4837,9 +4853,9 @@ SDValue SelectionDAG::getAtomicCmpSwap( SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTs, SDValue Chain, SDValue Ptr, - SDValue Cmp, SDValue Swp, +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, + EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, @@ -4853,11 +4869,9 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Val, - const Value* PtrVal, - unsigned Alignment, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + SDValue Chain, SDValue Ptr, SDValue Val, + const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4870,7 +4884,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - unsigned Flags = MachineMemOperand::MOVolatile; + auto Flags = MachineMemOperand::MOVolatile; if (Opcode != ISD::ATOMIC_STORE) Flags |= MachineMemOperand::MOLoad; if (Opcode != ISD::ATOMIC_LOAD) @@ -4884,11 +4898,9 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Val, - MachineMemOperand *MMO, - AtomicOrdering Ordering, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + SDValue Chain, SDValue Ptr, SDValue Val, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || @@ -4912,11 +4924,9 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - EVT VT, SDValue Chain, - SDValue Ptr, - MachineMemOperand *MMO, - AtomicOrdering Ordering, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + EVT VT, SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); @@ -4926,7 +4936,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, } /// getMergeValues - Create a MERGE_VALUES node from the given operands. -SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) { +SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { if (Ops.size() == 1) return Ops[0]; @@ -4937,17 +4947,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) { return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); } -SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, - unsigned Align, bool Vol, - bool ReadMem, bool WriteMem, unsigned Size) { +SDValue SelectionDAG::getMemIntrinsicNode( + unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, + bool ReadMem, bool WriteMem, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = 0; + auto Flags = MachineMemOperand::MONone; if (WriteMem) Flags |= MachineMemOperand::MOStore; if (ReadMem) @@ -4962,10 +4970,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } -SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - ArrayRef<SDValue> Ops, EVT MemVT, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, + SDVTList VTList, + ArrayRef<SDValue> Ops, EVT MemVT, + MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || @@ -4982,19 +4990,20 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTList, Ops, - MemVT, MMO); - CSEMap.InsertNode(N, IP); + N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTList, Ops, - MemVT, MMO); + N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); @@ -5032,50 +5041,40 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); - if (OffsetOp.getOpcode() == ISD::UNDEF) + if (OffsetOp.isUndef()) return InferPointerInfo(DAG, Ptr); return MachinePointerInfo(); } - -SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Offset, - MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, bool isInvariant, - unsigned Alignment, const AAMDNodes &AAInfo, - const MDNode *Ranges) { +SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Offset, + MachinePointerInfo PtrInfo, EVT MemVT, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); - unsigned Flags = MachineMemOperand::MOLoad; - if (isVolatile) - Flags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - Flags |= MachineMemOperand::MONonTemporal; - if (isInvariant) - Flags |= MachineMemOperand::MOInvariant; - + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(*this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAInfo, Ranges); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } -SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Offset, EVT MemVT, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { if (VT == MemVT) { ExtType = ISD::NON_EXTLOAD; } else if (ExtType == ISD::NON_EXTLOAD) { @@ -5094,8 +5093,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, } bool Indexed = AM != ISD::UNINDEXED; - assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && - "Unindexed load with an offset!"); + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); @@ -5108,100 +5106,90 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, AM, ExtType, - MemVT, MMO); + auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ExtType, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, - SDValue Chain, SDValue Ptr, - MachinePointerInfo PtrInfo, - bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, - const AAMDNodes &AAInfo, - const MDNode *Ranges) { +SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, MachinePointerInfo PtrInfo, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo, Ranges); + PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges); } -SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, - SDValue Chain, SDValue Ptr, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, MachineMemOperand *MMO) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, VT, MMO); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, - SDValue Chain, SDValue Ptr, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant, - Alignment, AAInfo); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo, + MemVT, Alignment, MMOFlags, AAInfo); } - -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, - SDValue Chain, SDValue Ptr, EVT MemVT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, MemVT, MMO); } -SDValue -SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, - SDValue Offset, ISD::MemIndexedMode AM) { +SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); - assert(LD->getOffset().getOpcode() == ISD::UNDEF && - "Load is already a indexed load!"); + assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); + // Don't propagate the invariant flag. + auto MMOFlags = + LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), - false, LD->getAlignment()); + LD->getMemoryVT(), LD->getAlignment(), MMOFlags); } -SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - bool isVolatile, bool isNonTemporal, - unsigned Alignment, const AAMDNodes &AAInfo) { - assert(Chain.getValueType() == MVT::Other && - "Invalid chain type"); + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); - unsigned Flags = MachineMemOperand::MOStore; - if (isVolatile) - Flags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - Flags |= MachineMemOperand::MONonTemporal; + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, - Val.getValueType().getStoreSize(), Alignment, - AAInfo); - + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } -SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -5216,46 +5204,42 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, - ISD::UNINDEXED, false, VT, MMO); + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - EVT SVT,bool isVolatile, bool isNonTemporal, - unsigned Alignment, + EVT SVT, unsigned Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); - unsigned Flags = MachineMemOperand::MOStore; - if (isVolatile) - Flags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - Flags |= MachineMemOperand::MONonTemporal; + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, - AAInfo); - + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, EVT SVT, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); @@ -5285,24 +5269,24 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, - ISD::UNINDEXED, true, SVT, MMO); + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue -SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, - SDValue Offset, ISD::MemIndexedMode AM) { +SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { StoreSDNode *ST = cast<StoreSDNode>(OrigStore); - assert(ST->getOffset().getOpcode() == ISD::UNDEF && - "Store is already a indexed store!"); + assert(ST->getOffset().isUndef() && "Store is already a indexed store!"); SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; FoldingSetNodeID ID; @@ -5311,23 +5295,23 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, AM, - ST->isTruncatingStore(), - ST->getMemoryVT(), - ST->getMemOperand()); + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ST->isTruncatingStore(), ST->getMemoryVT(), + ST->getMemOperand()); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue -SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, - MachineMemOperand *MMO, ISD::LoadExtType ExtTy) { +SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue Src0, + EVT MemVT, MachineMemOperand *MMO, + ISD::LoadExtType ExtTy) { SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; @@ -5340,21 +5324,23 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), - dl.getDebugLoc(), Ops, 4, VTs, - ExtTy, MemVT, MMO); + auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ExtTy, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, - SDValue Ptr, SDValue Mask, EVT MemVT, - MachineMemOperand *MMO, bool isTrunc) { +SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + EVT MemVT, MachineMemOperand *MMO, + bool isTrunc) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); @@ -5367,22 +5353,23 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), - dl.getDebugLoc(), Ops, 4, - VTs, isTrunc, MemVT, MMO); + auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + isTrunc, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue -SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, - ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + assert(Ops.size() == 5 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -5393,21 +5380,34 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - MaskedGatherSDNode *N = - new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), - Ops, VTs, VT, MMO); + + auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO); + createOperands(N, Ops); + + assert(N->getValue().getValueType() == N->getValueType(0) && + "Incompatible type of the PassThru value in MaskedGatherSDNode"); + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() == + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between index and data"); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { + assert(Ops.size() == 5 && "Incompatible number of operands"); + FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); @@ -5416,27 +5416,33 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = - new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(), - Ops, VTs, VT, MMO); + auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() == + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between index and data"); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, - SDValue Chain, SDValue Ptr, - SDValue SV, - unsigned Align) { +SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDUse> Ops) { switch (Ops.size()) { case 0: return getNode(Opcode, DL, VT); @@ -5452,7 +5458,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, return getNode(Opcode, DL, VT, NewOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { @@ -5498,27 +5504,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { return getNode(Opcode, DL, getVTList(ResultTys), Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef<SDValue> Ops) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); @@ -5548,83 +5555,56 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, // Memoize the node unless it returns a flag. SDNode *N; - unsigned NumOps = Ops.size(); if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0]); - } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1]); - } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1], Ops[2]); - } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops); - } + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0]); - } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1]); - } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1], Ops[2]); - } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops); - } + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTList) { return getNode(Opcode, DL, VTList, None); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4, SDValue N5) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, SDValue N4, + SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; return getNode(Opcode, DL, VTList, Ops); } @@ -5932,10 +5912,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs,ArrayRef<SDValue> Ops) { - N = MorphNodeTo(N, ~MachineOpc, VTs, Ops); + SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops); // Reset the NodeID to -1. - N->setNodeId(-1); - return N; + New->setNodeId(-1); + if (New != N) { + ReplaceAllUsesWith(N, New); + RemoveDeadNode(N); + } + return New; } /// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away @@ -5945,7 +5929,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, /// probability having other instructions associated with that line. /// /// For IROrder, we keep the smaller of the two -SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { +SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) { DebugLoc NLoc = N->getDebugLoc(); if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); @@ -5973,13 +5957,12 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// deleting things. SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef<SDValue> Ops) { - unsigned NumOps = Ops.size(); // If an identical node already exists, use it. void *IP = nullptr; if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops); - if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP)) + if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } @@ -6002,36 +5985,13 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, DeadNodeSet.insert(Used); } - if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { - // Initialize the memory references information. + // For MachineNode, initialize the memory references information. + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) MN->setMemRefs(nullptr, nullptr); - // If NumOps is larger than the # of operands we can have in a - // MachineSDNode, reallocate the operand list. - if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { - if (MN->OperandsNeedDelete) - delete[] MN->OperandList; - if (NumOps > array_lengthof(MN->LocalOperands)) - // We're creating a final node that will live unmorphed for the - // remainder of the current SelectionDAG iteration, so we can allocate - // the operands directly out of a pool with no recycling metadata. - MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), - Ops.data(), NumOps); - else - MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps); - MN->OperandsNeedDelete = false; - } else - MN->InitOperands(MN->OperandList, Ops.data(), NumOps); - } else { - // If NumOps is larger than the # of operands we currently have, reallocate - // the operand list. - if (NumOps > N->NumOperands) { - if (N->OperandsNeedDelete) - delete[] N->OperandList; - N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps); - N->OperandsNeedDelete = true; - } else - N->InitOperands(N->OperandList, Ops.data(), NumOps); - } + + // Swap for an appropriately sized array from the recycler. + removeOperands(N); + createOperands(N, Ops); // Delete any nodes that are still dead after adding the uses for the // new operands. @@ -6055,155 +6015,133 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, /// Note that getMachineNode returns the resultant node. If there is already a /// node of the specified opcode and operands, it returns that node instead of /// the current one. -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, None); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, - SDValue Op1, SDValue Op2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, - SDValue Op1, SDValue Op2, SDValue Op3) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1, SDValue Op2, + SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return getMachineNode(Opcode, dl, VTs, None); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, SDValue Op1) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, SDValue Op1, - SDValue Op2, SDValue Op3) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, EVT VT3, - SDValue Op1, SDValue Op2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, EVT VT3, - SDValue Op1, SDValue Op2, SDValue Op3) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, EVT VT3, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, - EVT VT2, EVT VT3, EVT VT4, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, EVT VT4, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - ArrayRef<EVT> ResultTys, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + ArrayRef<EVT> ResultTys, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(ResultTys); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, - ArrayRef<SDValue> OpsArray) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTs, + ArrayRef<SDValue> Ops) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; void *IP = nullptr; - const SDValue *Ops = OpsArray.data(); - unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); + AddNodeIDNode(ID, ~Opcode, VTs, Ops); IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } } // Allocate a new MachineSDNode. - N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs); - - // Initialize the operands list. - if (NumOps > array_lengthof(N->LocalOperands)) - // We're creating a final node that will live unmorphed for the - // remainder of the current SelectionDAG iteration, so we can allocate - // the operands directly out of a pool with no recycling metadata. - N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), - Ops, NumOps); - else - N->InitOperands(N->LocalOperands, Ops, NumOps); - N->OperandsNeedDelete = false; + N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); if (DoCSE) CSEMap.InsertNode(N, IP); @@ -6214,9 +6152,8 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, /// getTargetExtractSubreg - A convenience function for creating /// TargetOpcode::EXTRACT_SUBREG nodes. -SDValue -SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, - SDValue Operand) { +SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, + SDValue Operand) { SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, Operand, SRIdxVal); @@ -6225,9 +6162,8 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, /// getTargetInsertSubreg - A convenience function for creating /// TargetOpcode::INSERT_SUBREG nodes. -SDValue -SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, - SDValue Operand, SDValue Subreg) { +SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, + SDValue Operand, SDValue Subreg) { SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, Operand, Subreg, SRIdxVal); @@ -6243,7 +6179,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { if (Flags) E->intersectFlagsWith(Flags); return E; @@ -6257,7 +6193,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, /// SDNode SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool IsIndirect, uint64_t Off, - DebugLoc DL, unsigned O) { + const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) @@ -6267,7 +6203,7 @@ SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, /// Constant SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t Off, - DebugLoc DL, unsigned O) { + const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O); @@ -6276,7 +6212,8 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, /// FrameIndex SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { + const DebugLoc &DL, + unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O); @@ -6348,6 +6285,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { AddModifiedNodeToCSEMaps(User); } + // Preserve Debug Values + TransferDbgValues(FromN, To); + // If we just RAUW'd the root, take note. if (FromN == getRoot()) setRoot(To); @@ -6371,6 +6311,13 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { if (From == To) return; + // Preserve Debug Info. Only do this if there's a use. + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + if (From->hasAnyUseOfValue(i)) { + assert((i < To->getNumValues()) && "Invalid To location"); + TransferDbgValues(SDValue(From, i), SDValue(To, i)); + } + // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); @@ -6410,6 +6357,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { if (From->getNumValues() == 1) // Handle the simple case efficiently. return ReplaceAllUsesWith(SDValue(From, 0), To[0]); + // Preserve Debug Info. + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + TransferDbgValues(SDValue(From, i), *To); + // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); @@ -6454,6 +6405,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ return; } + // Preserve Debug Info. + TransferDbgValues(From, To); + // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From.getNode()->use_begin(), @@ -6528,6 +6482,8 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, if (Num == 1) return ReplaceAllUsesOfValueWith(*From, *To); + TransferDbgValues(*From, *To); + // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the // replacement process. @@ -6628,7 +6584,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { if (Degree == 0) { // All of P's operands are sorted, so P may sorted now. P->setNodeId(DAGSize++); - if (P != SortedPos) + if (P->getIterator() != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); assert(SortedPos != AllNodes.end() && "Overran node list"); ++SortedPos; @@ -6637,7 +6593,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { P->setNodeId(Degree); } } - if (&Node == SortedPos) { + if (Node.getIterator() == SortedPos) { #ifndef NDEBUG allnodes_iterator I(N); SDNode *S = &*++I; @@ -6676,28 +6632,26 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } -/// TransferDbgValues - Transfer SDDbgValues. +/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes. void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { if (From == To || !From.getNode()->getHasDebugValue()) return; SDNode *FromNode = From.getNode(); SDNode *ToNode = To.getNode(); ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); - SmallVector<SDDbgValue *, 2> ClonedDVs; for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); I != E; ++I) { SDDbgValue *Dbg = *I; - if (Dbg->getKind() == SDDbgValue::SDNODE) { + // Only add Dbgvalues attached to same ResNo. + if (Dbg->getKind() == SDDbgValue::SDNODE && + Dbg->getResNo() == From.getResNo()) { SDDbgValue *Clone = getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), Dbg->getDebugLoc(), Dbg->getOrder()); - ClonedDVs.push_back(Clone); + AddDbgValue(Clone, ToNode, false); } } - for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(), - E = ClonedDVs.end(); I != E; ++I) - AddDbgValue(*I, ToNode, false); } //===----------------------------------------------------------------------===// @@ -6724,26 +6678,31 @@ bool llvm::isOneConstant(SDValue V) { return Const != nullptr && Const->isOne(); } +bool llvm::isBitwiseNot(SDValue V) { + return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1)); +} + HandleSDNode::~HandleSDNode() { DropOperands(); } GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, - DebugLoc DL, const GlobalValue *GA, - EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + const DebugLoc &DL, + const GlobalValue *GA, EVT VT, + int64_t o, unsigned char TF) + : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } -AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, - SDValue X, unsigned SrcAS, +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, + EVT VT, unsigned SrcAS, unsigned DestAS) - : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), - SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} -MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - EVT memvt, MachineMemOperand *mmo) - : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); @@ -6755,16 +6714,6 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } -MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - ArrayRef<SDValue> Ops, EVT memvt, MachineMemOperand *mmo) - : SDNode(Opc, Order, dl, VTs, Ops), - MemoryVT(memvt), MMO(mmo) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), MMO->isInvariant()); - assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); - assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); -} - /// Profile - Gather unique data for the node. /// void SDNode::Profile(FoldingSetNodeID &ID) const { @@ -6894,44 +6843,13 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, return false; } -/// hasPredecessor - Return true if N is a predecessor of this node. -/// N is either an operand of this node, or can be reached by recursively -/// traversing up the operands. -/// NOTE: This is an expensive method. Use it carefully. bool SDNode::hasPredecessor(const SDNode *N) const { SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; + Worklist.push_back(this); return hasPredecessorHelper(N, Visited, Worklist); } -bool -SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSetImpl<const SDNode *> &Visited, - SmallVectorImpl<const SDNode *> &Worklist) const { - if (Visited.empty()) { - Worklist.push_back(this); - } else { - // Take a look in the visited set. If we've already encountered this node - // we needn't search further. - if (Visited.count(N)) - return true; - } - - // Haven't visited N yet. Continue the search. - while (!Worklist.empty()) { - const SDNode *M = Worklist.pop_back_val(); - for (const SDValue &OpV : M->op_values()) { - SDNode *Op = OpV.getNode(); - if (Visited.insert(Op).second) - Worklist.push_back(Op); - if (Op == N) - return true; - } - } - - return false; -} - uint64_t SDNode::getConstantOperandVal(unsigned Num) const { assert(Num < NumOperands && "Invalid child # of SDNode!"); return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); @@ -7018,12 +6936,14 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars); } - -/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a -/// location that is 'Dist' units away from the location that the 'Base' load -/// is loading from. -bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, - unsigned Bytes, int Dist) const { +bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, + LoadSDNode *Base, + unsigned Bytes, + int Dist) const { + if (LD->isVolatile() || Base->isVolatile()) + return false; + if (LD->isIndexed() || Base->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getValueType(0); @@ -7204,7 +7124,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SDValue OpVal = getOperand(i); unsigned BitPos = j * EltBitSize; - if (OpVal.getOpcode() == ISD::UNDEF) + if (OpVal.isUndef()) SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). @@ -7250,7 +7170,7 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { SDValue Splatted; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { SDValue Op = getOperand(i); - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { if (UndefElements) (*UndefElements)[i] = true; } else if (!Splatted) { @@ -7261,7 +7181,7 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { } if (!Splatted) { - assert(getOperand(0).getOpcode() == ISD::UNDEF && + assert(getOperand(0).isUndef() && "Can only have a splat without a constant for all undefs."); return getOperand(0); } @@ -7286,7 +7206,7 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { bool IsExact; APSInt IntVal(BitWidth); - APFloat APF = CN->getValueAPF(); + const APFloat &APF = CN->getValueAPF(); if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != APFloat::opOK || !IsExact) @@ -7322,6 +7242,22 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + // Treat a GlobalAddress supporting constant offset folding as a + // constant integer. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N)) + if (GA->getOpcode() == ISD::GlobalAddress && + TLI->isOffsetFoldingLegal(GA)) + return GA; + return nullptr; +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, @@ -7353,9 +7289,9 @@ void llvm::checkForCycles(const llvm::SDNode *N, bool force) { #ifndef NDEBUG bool check = force; -#ifdef XDEBUG +#ifdef EXPENSIVE_CHECKS check = true; -#endif // XDEBUG +#endif // EXPENSIVE_CHECKS if (check) { assert(N && "Checking nonexistent SDNode"); SmallPtrSet<const SDNode*, 32> visited; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 45ae39af7600d..e03282cad6b83 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -20,9 +20,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -34,6 +36,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" @@ -42,6 +45,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -61,7 +65,6 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <utility> @@ -84,6 +87,19 @@ static cl::opt<bool> EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); +/// Minimum jump table density for normal functions. +static cl::opt<unsigned> +JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "a normal function")); + +/// Minimum jump table density for -Os or -Oz functions. +static cl::opt<unsigned> +OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "an optsize function")); + + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -94,26 +110,25 @@ EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st // sequence over this should have been converted to llvm.memcpy by the -// frontend. It easy to induce this behavior with .ll code such as: +// frontend. It is easy to induce this behavior with .ll code such as: // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, - const SDValue *Parts, - unsigned NumParts, MVT PartVT, EVT ValueVT, - const Value *V, - ISD::NodeType AssertOp = ISD::DELETED_NODE) { +static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V, + Optional<ISD::NodeType> AssertOp = None) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V); @@ -193,6 +208,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, } // There is now one part, held in Val. Correct it to match ValueVT. + // PartEVT is the type of the register class that holds the value. + // ValueVT is the type of the inline asm operation. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) @@ -206,13 +223,18 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); } + // Handle types that have the same size. + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle types with different sizes. if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. - if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, DL, PartEVT, Val, + if (AssertOp.hasValue()) + Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } @@ -229,9 +251,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } - if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - llvm_unreachable("Unknown mismatch!"); } @@ -251,10 +270,10 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a -/// type larger then ValueVT then AssertOp can be used to specify whether the +/// type larger than ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); @@ -353,16 +372,16 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } -static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, - SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V, +static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); @@ -427,9 +446,11 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) + if (PartEVT != ValueVT) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "scalar-to-vector conversion failed"); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } Parts[0] = Val; return; @@ -489,7 +510,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. -static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); @@ -618,9 +639,8 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V) const { + const SDLoc &dl, SDValue &Chain, + SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -676,25 +696,33 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - else + if (NumSignBits == RegSize) { + isSExt = true; // ASSERT SEXT 1 + FromVT = MVT::i1; + } else if (NumZeroBits >= RegSize - 1) { + isSExt = false; // ASSERT ZEXT 1 + FromVT = MVT::i1; + } else if (NumSignBits > RegSize - 8) { + isSExt = true; // ASSERT SEXT 8 + FromVT = MVT::i8; + } else if (NumZeroBits >= RegSize - 8) { + isSExt = false; // ASSERT ZEXT 8 + FromVT = MVT::i8; + } else if (NumSignBits > RegSize - 16) { + isSExt = true; // ASSERT SEXT 16 + FromVT = MVT::i16; + } else if (NumZeroBits >= RegSize - 16) { + isSExt = false; // ASSERT ZEXT 16 + FromVT = MVT::i16; + } else if (NumSignBits > RegSize - 32) { + isSExt = true; // ASSERT SEXT 32 + FromVT = MVT::i32; + } else if (NumZeroBits >= RegSize - 32) { + isSExt = false; // ASSERT ZEXT 32 + FromVT = MVT::i32; + } else { continue; - + } // Add an assertion node. assert(FromVT != MVT::Other); Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, @@ -714,8 +742,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, const Value *V, +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, SDValue *Flag, + const Value *V, ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ISD::NodeType ExtendKind = PreferredExtendType; @@ -770,7 +799,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, SDLoc dl, + unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -902,10 +931,48 @@ SDValue SelectionDAGBuilder::getControlRoot() { return Root; } +/// Copy swift error to the final virtual register at end of a basic block, as +/// specified by SwiftErrorWorklist, if necessary. +static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) { + const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo(); + if (!TLI.supportSwiftError()) + return; + + if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB)) + return; + + // Go through entries in SwiftErrorWorklist, and create copy as necessary. + FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry = + SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB]; + FunctionLoweringInfo::SwiftErrorVRegs &MapEntry = + SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB]; + for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) { + unsigned WorkReg = WorklistEntry[I]; + + // Find the swifterror virtual register for the value in SwiftErrorMap. + unsigned MapReg = MapEntry[I]; + assert(TargetRegisterInfo::isVirtualRegister(MapReg) && + "Entries in SwiftErrorMap should be virtual registers"); + + if (WorkReg == MapReg) + continue; + + // Create copy from SwiftErrorMap to SwiftWorklist. + auto &DL = SDB.DAG.getDataLayout(); + SDValue CopyNode = SDB.DAG.getCopyToReg( + SDB.getRoot(), SDB.getCurSDLoc(), WorkReg, + SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL)))); + MapEntry[I] = WorkReg; + SDB.DAG.setRoot(CopyNode); + } +} + void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. - if (isa<TerminatorInst>(&I)) + if (isa<TerminatorInst>(&I)) { + copySwiftErrorsToFinalVRegs(*this); HandlePHINodesInSuccessorBlocks(I.getParent()); + } ++SDNodeOrder; @@ -992,10 +1059,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // If there's a virtual register allocated and initialized for this // value, use it. - SDValue copyFromReg = getCopyFromRegs(V, V->getType()); - if (copyFromReg.getNode()) { + if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) return copyFromReg; - } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); @@ -1206,7 +1271,7 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // This will be used by the FuncletLayout pass to determine how to order the // BB's. // A 'catchret' returns to the outer scope's color. - Value *ParentPad = I.getParentPad(); + Value *ParentPad = I.getCatchSwitchParentPad(); const BasicBlock *SuccessorColor; if (isa<ConstantTokenNone>(ParentPad)) SuccessorColor = &FuncInfo.Fn->getEntryBlock(); @@ -1314,6 +1379,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; + // Calls to @llvm.experimental.deoptimize don't generate a return value, so + // lower + // + // %val = call <ty> @llvm.experimental.deoptimize() + // ret <ty> %val + // + // differently. + if (I.getParent()->getTerminatingDeoptimizeCall()) { + LowerDeoptimizingReturn(); + return; + } + if (!FuncInfo.CanLowerReturn) { unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); @@ -1346,11 +1423,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), &Flags); - Chains[i] = - DAG.getStore(Chain, getCurSDLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - // FIXME: better loc info would be nice. - Add, MachinePointerInfo(), false, false, 0); + Chains[i] = DAG.getStore(Chain, getCurSDLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Add, MachinePointerInfo()); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1380,7 +1456,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); + VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(Context, VT); MVT PartVT = TLI.getRegisterType(Context, VT); @@ -1409,6 +1485,23 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } } + // Push in swifterror virtual register as the last element of Outs. This makes + // sure swifterror virtual register will be returned in the swifterror + // physical register. + const Function *F = I.getParent()->getParent(); + if (TLI.supportSwiftError() && + F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + Flags.setSwiftError(); + Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, + EVT(TLI.getPointerTy(DL)) /*argvt*/, + true /*isfixed*/, 1 /*origidx*/, + 0 /*partOffs*/)); + // Create SDNode for the swifterror virtual register. + OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0], + EVT(TLI.getPointerTy(DL)))); + } + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); @@ -1906,6 +1999,27 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, DAG.setRoot(BrCond); } +/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global +/// variable if there exists one. +static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, + SDValue &Chain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); + MachineSDNode *Node = + DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); + if (Global) { + MachinePointerInfo MPInfo(Global); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8, + DAG.getEVTAlignment(PtrTy)); + Node->setMemRefs(MemRefs, MemRefs + 1); + } + return SDValue(Node, 0); +} + /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// @@ -1922,32 +2036,59 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); - const Value *IRGuard = SPD.getGuard(); - SDValue GuardPtr = getValue(IRGuard); + SDValue Guard; + SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + const Module &M = *ParentBB->getParent()->getFunction()->getParent(); + unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); - unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); + // Generate code to load the content of the guard slot. + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, + MachineMemOperand::MOVolatile); + + // Retrieve guard check function, nullptr if instrumentation is inlined. + if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { + // The target provides a guard check function to validate the guard value. + // Generate a call to that function with the content of the guard slot as + // argument. + auto *Fn = cast<Function>(GuardCheck); + FunctionType *FnTy = Fn->getFunctionType(); + assert(FnTy->getNumParams() == 1 && "Invalid function signature"); - SDValue Guard; - SDLoc dl = getCurSDLoc(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = StackSlot; + Entry.Ty = FnTy->getParamType(0); + if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) + Entry.isInReg = true; + Args.push_back(Entry); - // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the - // guard value from the virtual register holding the value. Otherwise, emit a - // volatile load to retrieve the stack guard value. - unsigned GuardReg = SPD.getGuardReg(); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(DAG.getEntryNode()) + .setCallee(Fn->getCallingConv(), FnTy->getReturnType(), + getValue(GuardCheck), std::move(Args)); - if (GuardReg && TLI.useLoadStackGuardNode()) - Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, - PtrTy); - else - Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - GuardPtr, MachinePointerInfo(IRGuard, 0), - true, false, false, Align); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return; + } - SDValue StackSlot = DAG.getLoad( - PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, - false, false, Align); + // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. + // Otherwise, emit a volatile load to retrieve the stack guard value. + SDValue Chain = DAG.getEntryNode(); + if (TLI.useLoadStackGuardNode()) { + Guard = getLoadStackGuard(DAG, dl, Chain); + } else { + const Value *IRGuard = TLI.getSDagStackGuard(M); + SDValue GuardPtr = getValue(IRGuard); + + Guard = + DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), + Align, MachineMemOperand::MOVolatile); + } // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); @@ -2115,6 +2256,12 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; const BasicBlock *EHPadBB = I.getSuccessor(1); + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower invokes with arbitrary operand bundles yet!"); + const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) @@ -2134,8 +2281,15 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } - } else + } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { + // Currently we do not lower any intrinsic calls with deopt operand bundles. + // Eventually we will support lowering the @llvm.experimental.deoptimize + // intrinsic, and right now there are no plans to support other intrinsics + // with deopt state. + LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); + } else { LowerCallTo(&I, getValue(Callee), false, EHPadBB); + } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -2309,6 +2463,129 @@ void SelectionDAGBuilder::visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } +/// Checks if the given instruction performs a vector reduction, in which case +/// we have the freedom to alter the elements in the result as long as the +/// reduction of them stays unchanged. +static bool isVectorReductionOp(const User *I) { + const Instruction *Inst = dyn_cast<Instruction>(I); + if (!Inst || !Inst->getType()->isVectorTy()) + return false; + + auto OpCode = Inst->getOpcode(); + switch (OpCode) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + break; + case Instruction::FAdd: + case Instruction::FMul: + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (FPOp->getFastMathFlags().unsafeAlgebra()) + break; + // Fall through. + default: + return false; + } + + unsigned ElemNum = Inst->getType()->getVectorNumElements(); + unsigned ElemNumToReduce = ElemNum; + + // Do DFS search on the def-use chain from the given instruction. We only + // allow four kinds of operations during the search until we reach the + // instruction that extracts the first element from the vector: + // + // 1. The reduction operation of the same opcode as the given instruction. + // + // 2. PHI node. + // + // 3. ShuffleVector instruction together with a reduction operation that + // does a partial reduction. + // + // 4. ExtractElement that extracts the first element from the vector, and we + // stop searching the def-use chain here. + // + // 3 & 4 above perform a reduction on all elements of the vector. We push defs + // from 1-3 to the stack to continue the DFS. The given instruction is not + // a reduction operation if we meet any other instructions other than those + // listed above. + + SmallVector<const User *, 16> UsersToVisit{Inst}; + SmallPtrSet<const User *, 16> Visited; + bool ReduxExtracted = false; + + while (!UsersToVisit.empty()) { + auto User = UsersToVisit.back(); + UsersToVisit.pop_back(); + if (!Visited.insert(User).second) + continue; + + for (const auto &U : User->users()) { + auto Inst = dyn_cast<Instruction>(U); + if (!Inst) + return false; + + if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + return false; + UsersToVisit.push_back(U); + } else if (const ShuffleVectorInst *ShufInst = + dyn_cast<ShuffleVectorInst>(U)) { + // Detect the following pattern: A ShuffleVector instruction together + // with a reduction that do partial reduction on the first and second + // ElemNumToReduce / 2 elements, and store the result in + // ElemNumToReduce / 2 elements in another vector. + + unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); + if (ResultElements < ElemNum) + return false; + + if (ElemNumToReduce == 1) + return false; + if (!isa<UndefValue>(U->getOperand(1))) + return false; + for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) + if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) + return false; + for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) + if (ShufInst->getMaskValue(i) != -1) + return false; + + // There is only one user of this ShuffleVector instruction, which + // must be a reduction operation. + if (!U->hasOneUse()) + return false; + + auto U2 = dyn_cast<Instruction>(*U->user_begin()); + if (!U2 || U2->getOpcode() != OpCode) + return false; + + // Check operands of the reduction operation. + if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || + (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { + UsersToVisit.push_back(U2); + ElemNumToReduce /= 2; + } else + return false; + } else if (isa<ExtractElementInst>(U)) { + // At this moment we should have reduced all elements in the vector. + if (ElemNumToReduce != 1) + return false; + + const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); + if (!Val || Val->getZExtValue() != 0) + return false; + + ReduxExtracted = true; + } else + return false; + } + } + return ReduxExtracted; +} + void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2316,6 +2593,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { bool nuw = false; bool nsw = false; bool exact = false; + bool vec_redux = false; FastMathFlags FMF; if (const OverflowingBinaryOperator *OFBinOp = @@ -2329,10 +2607,16 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) FMF = FPOp->getFastMathFlags(); + if (isVectorReductionOp(&I)) { + vec_redux = true; + DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + } + SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); + Flags.setVectorReduction(vec_redux); if (EnableFMFInDAG) { Flags.setAllowReciprocal(FMF.allowReciprocal()); Flags.setNoInfs(FMF.noInfs()); @@ -2433,7 +2717,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. // FIXME: We should propagate the fast-math-flags to the DAG node itself for // further optimization, but currently FMF is only applicable to binary nodes. @@ -2444,6 +2728,14 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } +// Check if the condition of the select has one use or two users that are both +// selects with the same condition. +static bool hasOnlySelectUsers(const Value *Cond) { + return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) { + return isa<SelectInst>(V); + }); +} + void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), @@ -2529,7 +2821,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // If the underlying comparison instruction is used by any other // instruction, the consumed instructions won't be destroyed, so it is // not profitable to convert to a min/max. - cast<SelectInst>(&I)->getCondition()->hasOneUse()) { + hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); @@ -2703,17 +2995,6 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { InVec, InIdx)); } -// Utility for visitShuffleVector - Return true if every element in Mask, -// beginning from position Pos and ending in Pos+Size, falls within the -// specified sequential range [L, L+Pos). or is undef. -static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, - unsigned Pos, unsigned Size, int Low) { - for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) - if (Mask[i] >= 0 && Mask[i] != Low) - return false; - return true; -} - void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); @@ -2728,8 +3009,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &Mask[0])); + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask)); return; } @@ -2738,29 +3018,46 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. - if (SrcNumElts*2 == MaskNumElts) { - // First check for Src1 in low and Src2 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src1, Src2)); - return; + + unsigned NumConcat = MaskNumElts / SrcNumElts; + + // Check if the shuffle is some kind of concatenation of the input vectors. + bool IsConcat = true; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + continue; + // Ensure the indices in each SrcVT sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { + IsConcat = false; + break; } - // Then check for Src2 in low and Src1 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src2, Src1)); - return; + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. Just emit + // a CONCAT_VECTORS operation. + if (IsConcat) { + SmallVector<SDValue, 8> ConcatOps; + for (auto Src : ConcatSrcs) { + if (Src < 0) + ConcatOps.push_back(DAG.getUNDEF(SrcVT)); + else if (Src == 0) + ConcatOps.push_back(Src1); + else + ConcatOps.push_back(Src2); } + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), + VT, ConcatOps)); + return; } // Pad both vectors with undefs to make them the same length as the mask. - unsigned NumConcat = MaskNumElts / SrcNumElts; - bool Src1U = Src1.getOpcode() == ISD::UNDEF; - bool Src2U = Src2.getOpcode() == ISD::UNDEF; SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); @@ -2768,10 +3065,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps1); - Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps2); + Src1 = Src1.isUndef() ? DAG.getUNDEF(VT) + : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps1); + Src2 = Src2.isUndef() ? DAG.getUNDEF(VT) + : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps2); // Readjust mask for new input vector length. SmallVector<int, 8> MappedOps; @@ -2783,7 +3082,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &MappedOps[0])); + MappedOps)); return; } @@ -2864,7 +3163,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &MappedOps[0])); + MappedOps)); return; } } @@ -2982,8 +3281,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. - Type *Ty = Op0->getType()->getScalarType(); - unsigned AS = Ty->getPointerAddressSpace(); + unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); @@ -2993,14 +3291,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { cast<VectorType>(I.getType())->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { - MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + LLVMContext &Context = *DAG.getContext(); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); SmallVector<SDValue, 16> Ops(VectorWidth, N); N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } - for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); - OI != E; ++OI) { - const Value *Idx = *OI; - if (StructType *StTy = dyn_cast<StructType>(Ty)) { + for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (StructType *StTy = dyn_cast<StructType>(*GTI)) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset @@ -3015,14 +3314,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, DAG.getConstant(Offset, dl, N.getValueType()), &Flags); } - - Ty = StTy->getElementType(Field); } else { - Ty = cast<SequentialType>(Ty)->getElementType(); MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); unsigned PtrSize = PtrTy.getSizeInBits(); - APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); + APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3055,7 +3351,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && VectorWidth) { MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); SmallVector<SDValue, 16> Ops(VectorWidth, IdxN); - IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } // If the index is smaller or larger than intptr_t, truncate or extend // it. @@ -3144,7 +3440,22 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(SV)) { + if (Arg->hasSwiftErrorAttr()) + return visitLoadFromSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { + if (Alloca->isSwiftError()) + return visitLoadFromSwiftError(I); + } + } + SDValue Ptr = getValue(SV); Type *Ty = I.getType(); @@ -3168,7 +3479,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); @@ -3223,10 +3533,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); - SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, - A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, AAInfo, - Ranges); + auto MMOFlags = MachineMemOperand::MONone; + if (isVolatile) + MMOFlags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + MMOFlags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + MMOFlags |= MachineMemOperand::MOInvariant; + + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, + MachinePointerInfo(SV, Offsets[i]), Alignment, + MMOFlags, AAInfo, Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3245,6 +3562,64 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getVTList(ValueVTs), Values)); } +void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + assert(TLI.supportSwiftError() && + "call visitStoreToSwiftError when backend supports swifterror"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + const Value *SrcV = I.getOperand(0); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + SDValue Src = getValue(SrcV); + // Create a virtual register, then update the virtual register. + auto &DL = DAG.getDataLayout(); + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue + // Chain can be getRoot or getControlRoot. + SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, + SDValue(Src.getNode(), Src.getResNo())); + DAG.setRoot(CopyNode); + FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); +} + +void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { + assert(DAG.getTargetLoweringInfo().supportSwiftError() && + "call visitLoadFromSwiftError when backend supports swifterror"); + + assert(!I.isVolatile() && + I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && + I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && + "Support volatile, non temporal, invariant for load_from_swift_error"); + + const Value *SV = I.getOperand(0); + Type *Ty = I.getType(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + assert(!AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && + "load_from_swift_error should not be constant memory"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, + ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT + SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(), + FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV), + ValueVTs[0]); + + setValue(&I, L); +} + void SelectionDAGBuilder::visitStore(const StoreInst &I) { if (I.isAtomic()) return visitAtomicStore(I); @@ -3252,6 +3627,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { + if (Arg->hasSwiftErrorAttr()) + return visitStoreToSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { + if (Alloca->isSwiftError()) + return visitStoreToSwiftError(I); + } + } + SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), @@ -3268,15 +3658,18 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Root = getRoot(); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); + SDLoc dl = getCurSDLoc(); EVT PtrVT = Ptr.getValueType(); - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); - SDLoc dl = getCurSDLoc(); - AAMDNodes AAInfo; I.getAAMetadata(AAInfo); + auto MMOFlags = MachineMemOperand::MONone; + if (I.isVolatile()) + MMOFlags |= MachineMemOperand::MOVolatile; + if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) + MMOFlags |= MachineMemOperand::MONonTemporal; + // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. SDNodeFlags Flags; @@ -3293,10 +3686,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); - SDValue St = DAG.getStore(Root, dl, - SDValue(Src.getNode(), Src.getResNo() + i), - Add, MachinePointerInfo(PtrV, Offsets[i]), - isVolatile, isNonTemporal, Alignment, AAInfo); + SDValue St = DAG.getStore( + Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, + MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -3447,13 +3839,10 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), - AAInfo))) { - // Do not serialize (non-volatile) loads of constant memory with anything. - InChain = DAG.getEntryNode(); - } + // Do not serialize masked loads of constant memory with anything. + bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -3463,8 +3852,10 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, ISD::NON_EXTLOAD); - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); + if (AddToChain) { + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + } setValue(&I, Load); } @@ -3585,7 +3976,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), dl, + Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getPointerTy(DAG.getDataLayout())); Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy(DAG.getDataLayout())); @@ -3724,7 +4115,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); - } + } else + Result = lowerRangeToAssertZExt(DAG, I, Result); setValue(&I, Result); } @@ -3736,8 +4128,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// Op = (Op & 0x007fffff) | 0x3f800000; /// /// where Op is the hexadecimal representation of floating point value. -static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { +static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, @@ -3750,9 +4141,8 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// /// where Op is the hexadecimal representation of floating point value. -static SDValue -GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - SDLoc dl) { +static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, + const TargetLowering &TLI, const SDLoc &dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( @@ -3764,13 +4154,13 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, } /// getF32Constant - Get 32-bit floating point constant. -static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { +static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, + const SDLoc &dl) { return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, MVT::f32); } -static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, +static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SelectionDAG &DAG) { // TODO: What fast-math-flags should be set on the floating-point nodes? @@ -3862,7 +4252,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3885,9 +4275,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - + // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -3984,9 +4374,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - + // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4082,7 +4472,7 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? @@ -4173,7 +4563,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) @@ -4185,7 +4575,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, +static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && @@ -4214,7 +4604,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, /// ExpandPowI - Expand a llvm.powi intrinsic. -static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, +static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When @@ -4798,7 +5188,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: - case Intrinsic::round: { + case Intrinsic::round: + case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4812,6 +5203,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } setValue(&I, DAG.getNode(Opcode, sdl, @@ -4819,18 +5211,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } - case Intrinsic::minnum: - setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + case Intrinsic::minnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT) + ? ISD::FMINNAN + : ISD::FMINNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; - case Intrinsic::maxnum: - setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + } + case Intrinsic::maxnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT) + ? ISD::FMAXNAN + : ISD::FMAXNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; + } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -4954,47 +5356,35 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return nullptr; } + case Intrinsic::stackguard: { + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + const Module &M = *MF.getFunction()->getParent(); + SDValue Chain = getRoot(); + if (TLI.useLoadStackGuardNode()) { + Res = getLoadStackGuard(DAG, sdl, Chain); + } else { + const Value *Global = TLI.getSDagStackGuard(M); + unsigned Align = DL->getPrefTypeAlignment(Global->getType()); + Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), + MachinePointerInfo(Global, 0), Align, + MachineMemOperand::MOVolatile); + } + DAG.setRoot(Chain); + setValue(&I, Res); + return nullptr; + } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); - const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); - const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); - - // See if Ptr is a bitcast. If it is, look through it and see if we can get - // global variable __stack_chk_guard. - if (!GV) - if (const Operator *BC = dyn_cast<Operator>(Ptr)) - if (BC->getOpcode() == Instruction::BitCast) - GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); - - if (GV && TLI.useLoadStackGuardNode()) { - // Emit a LOAD_STACK_GUARD node. - MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, - sdl, PtrTy, Chain); - MachinePointerInfo MPInfo(GV); - MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); - unsigned Flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOInvariant; - *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, - PtrTy.getSizeInBits() / 8, - DAG.getEVTAlignment(PtrTy)); - Node->setMemRefs(MemRefs, MemRefs + 1); - - // Copy the guard value to a virtual register so that it can be - // retrieved in the epilogue. - Src = SDValue(Node, 0); - const TargetRegisterClass *RC = - TLI.getRegClassFor(Src.getSimpleValueType()); - unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); - - SPDescriptor.setGuardReg(Reg); - Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); - } else { + + if (TLI.useLoadStackGuardNode()) + Src = getLoadStackGuard(DAG, sdl, Chain); + else Src = getValue(I.getArgOperand(0)); // The guard's value. - } AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); @@ -5006,7 +5396,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Store the stack protector onto the stack. Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI), - true, false, 0); + /* Alignment = */ 0, MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return nullptr; @@ -5060,15 +5450,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } - case Intrinsic::gcroot: - if (GFI) { - const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); - const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); - - FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); - GFI->addStackRoot(FI->getIndex(), TypeMap); - } + case Intrinsic::gcroot: { + MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + (void)F; + assert(F->hasGC() && + "only valid in functions with gc specified, enforced by Verifier"); + assert(GFI && "implied by previous"); + const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); return nullptr; + } case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); @@ -5101,7 +5496,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -5193,18 +5588,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::invariant_end: // Discard region information. return nullptr; - case Intrinsic::stackprotectorcheck: { - // Do not actually emit anything for this basic block. Instead we initialize - // the stack protector descriptor and export the guard variable so we can - // access it in FinishBasicBlock. - const BasicBlock *BB = I.getParent(); - SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); - ExportFromCurrentBlock(SPDescriptor.getGuard()); - - // Flush our exports since we are going to process a terminator. - (void)getControlRoot(); - return nullptr; - } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: @@ -5220,11 +5603,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::experimental_gc_statepoint: { - visitStatepoint(I); + LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; } case Intrinsic::experimental_gc_result: { - visitGCResult(I); + visitGCResult(cast<GCResultInst>(I)); return nullptr; } case Intrinsic::experimental_gc_relocate: { @@ -5303,6 +5686,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, N); return nullptr; } + + case Intrinsic::experimental_deoptimize: + LowerDeoptimizeCall(&I); + return nullptr; } } @@ -5378,14 +5765,16 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, const BasicBlock *EHPadBB) { - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FTy->getReturnType(); + auto &DL = DAG.getDataLayout(); + FunctionType *FTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); + const Value *SwiftErrorVal = nullptr; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5399,6 +5788,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + + // Use swifterror virtual register as input to the call. + if (Entry.isSwiftError && TLI.supportSwiftError()) { + SwiftErrorVal = V; + // We find the virtual register for the actual swifterror argument. + // Instead of using the Value, we use the virtual register instead. + Entry.Node = DAG.getRegister( + FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); + } + Args.push_back(Entry); // If we have an explicit sret argument that is an Instruction, (i.e., it @@ -5413,13 +5813,32 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, isTailCall = false; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS) - .setTailCall(isTailCall); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall) + .setConvergent(CS.isConvergent()); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); - if (Result.first.getNode()) - setValue(CS.getInstruction(), Result.first); + if (Result.first.getNode()) { + const Instruction *Inst = CS.getInstruction(); + Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); + setValue(Inst, Result.first); + } + + // The last element of CLI.InVals has the SDValue for swifterror return. + // Here we copy it to a virtual register and update SwiftErrorMap for + // book-keeping. + if (SwiftErrorVal && TLI.supportSwiftError()) { + // Get the last element of InVals. + SDValue Src = CLI.InVals.back(); + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); + // We update the virtual register for the actual swifterror argument. + FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + DAG.setRoot(CopyNode); + } } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -5449,7 +5868,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, PointerType::getUnqual(LoadTy)); if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( - const_cast<Constant *>(LoadInput), *Builder.DL)) + const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL)) return Builder.getValue(LoadCst); } @@ -5470,9 +5889,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), - false /*volatile*/, - false /*nontemporal*/, - false /*isinvariant*/, 1 /* align=1 */); + /* Alignment = */ 1); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -5516,7 +5933,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return true; } - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), getValue(Size), @@ -5613,7 +6030,7 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { !I.getType()->isPointerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Src), getValue(Char), getValue(Length), @@ -5641,7 +6058,7 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { !I.getType()->isPointerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), getValue(Arg0), getValue(Arg1), @@ -5670,7 +6087,7 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), @@ -5697,7 +6114,7 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), MachinePointerInfo(Arg0)); @@ -5724,7 +6141,7 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), @@ -5803,9 +6220,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } // Check for well-known libc/libm calls. If the function is internal, it - // can't be a library call. + // can't be a library call. Don't do the check if marked as nobuiltin for + // some reason. LibFunc::Func Func; - if (!F->hasLocalLinkage() && F->hasName() && + if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { @@ -5952,9 +6370,19 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { RenameFn, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - // Check if we can potentially perform a tail call. More detailed checking is - // be done within LowerCallTo, after more information about the call is known. - LowerCallTo(&I, Callee, I.isTailCall()); + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower calls with arbitrary operand bundles!"); + + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); + else + // Check if we can potentially perform a tail call. More detailed checking + // is be done within LowerCallTo, after more information about the call is + // known. + LowerCallTo(&I, Callee, I.isTailCall()); } namespace { @@ -6036,9 +6464,8 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; /// /// OpInfo describes the operand. /// -static void GetRegistersForValue(SelectionDAG &DAG, - const TargetLowering &TLI, - SDLoc DL, +static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, + const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); @@ -6301,8 +6728,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); Chain = DAG.getStore( Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), - false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); OpInfo.CallOperand = StackSlot; } @@ -6349,6 +6775,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + if (CS.isConvergent()) + ExtraInfo |= InlineAsm::Extra_IsConvergent; // Set the asm dialect. ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; @@ -6413,10 +6841,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError( + CS, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6469,10 +6896,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); + emitInlineAsmError(CS, "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); return; } @@ -6486,10 +6912,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "inline asm error: This value" - " type register class is not natively supported!"); + emitInlineAsmError( + CS, "inline asm error: This value" + " type register class is not natively supported!"); return; } } @@ -6527,10 +6952,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6570,20 +6993,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // TODO: Support this. if (OpInfo.isIndirect) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "Don't know how to handle indirect register inputs yet " - "for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError( + CS, "Don't know how to handle indirect register inputs yet " + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6667,11 +7087,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Emit the non-flagged stores from the physregs. SmallVector<SDValue, 8> OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurSDLoc(), - StoresToEmit[i].first, + SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - MachinePointerInfo(StoresToEmit[i].second), - false, false, 0); + MachinePointerInfo(StoresToEmit[i].second)); OutChains.push_back(Val); } @@ -6681,6 +7099,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG.setRoot(Chain); } +void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, + const Twine &Message) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), Message); + + // Make sure we leave the DAG in a valid state + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); + setValue(CS.getInstruction(), DAG.getUNDEF(VT)); +} + void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), @@ -6715,16 +7144,49 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.getSrcValue(I.getArgOperand(1)))); } -/// \brief Lower an argument list according to the target calling convention. -/// -/// \return A tuple of <return-value, token-chain> +SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, + const Instruction &I, + SDValue Op) { + const MDNode *Range = I.getMetadata(LLVMContext::MD_range); + if (!Range) + return Op; + + Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue(); + if (!Lo->isNullValue()) + return Op; + + Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue(); + unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2(); + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + + SDLoc SL = getCurSDLoc(); + + SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), + Op, DAG.getValueType(SmallVT)); + unsigned NumVals = Op.getNode()->getNumValues(); + if (NumVals == 1) + return ZExt; + + SmallVector<SDValue, 4> Ops; + + Ops.push_back(ZExt); + for (unsigned I = 1; I != NumVals; ++I) + Ops.push_back(Op.getValue(I)); + + return DAG.getMergeValues(Ops, SL); +} + +/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of +/// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( - ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { +void SelectionDAGBuilder::populateCallLoweringInfo( + TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS, + unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, + bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6743,12 +7205,11 @@ std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( Args.push_back(Entry); } - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) - .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - - return lowerInvokable(CLI, EHPadBB); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setDiscardResult(CS->use_empty()) + .setIsPatchPoint(IsPatchPoint); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6769,7 +7230,7 @@ std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, - SDLoc DL, SmallVectorImpl<SDValue> &Ops, + const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); @@ -6889,8 +7350,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); - std::pair<SDValue, SDValue> Result = lowerCallOperands( - CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); + + TargetLowering::CallLoweringInfo CLI(DAG); + populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, + true); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -7057,6 +7521,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.isNest = false; Entry.isByVal = false; Entry.isReturned = false; + Entry.isSwiftSelf = false; + Entry.isSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); @@ -7085,10 +7551,23 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } } + // We push in swifterror return as the last element of CLI.Ins. + ArgListTy &Args = CLI.getArgs(); + if (supportSwiftError()) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + if (Args[i].isSwiftError) { + ISD::InputArg MyFlags; + MyFlags.VT = getPointerTy(DL); + MyFlags.ArgVT = EVT(getPointerTy(DL)); + MyFlags.Flags.setSwiftError(); + CLI.Ins.push_back(MyFlags); + } + } + } + // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); - ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); @@ -7114,6 +7593,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); + if (Args[i].isSwiftSelf) + Flags.setSwiftSelf(); + if (Args[i].isSwiftError) + Flags.setSwiftError(); if (Args[i].isByVal) Flags.setByVal(); if (Args[i].isInAlloca) { @@ -7202,6 +7685,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SmallVector<SDValue, 4> InVals; CLI.Chain = LowerCall(CLI, InVals); + // Update CLI.InVals to use outside of this function. + CLI.InVals = InVals; + // Verify that the target's LowerCall behaved as expected. assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); @@ -7219,12 +7705,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { return std::make_pair(SDValue(), SDValue()); } - DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { - assert(InVals[i].getNode() && - "LowerCall emitted a null value!"); - assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && - "LowerCall emitted a value with the wrong type!"); - }); +#ifndef NDEBUG + for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && "LowerCall emitted a null value!"); + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + } +#endif SmallVector<SDValue, 4> ReturnValues; if (!CanLowerReturn) { @@ -7254,7 +7741,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), - false, false, false, 1); + /* Alignment = */ 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -7263,7 +7750,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) @@ -7295,8 +7782,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - SDValue Res = LowerOperation(SDValue(N, 0), DAG); - if (Res.getNode()) + if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) Results.push_back(Res); } @@ -7394,6 +7880,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); + if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) + Flags.setSwiftSelf(); + if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) + Flags.setSwiftError(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { @@ -7483,7 +7973,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp = None; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, AssertOp); @@ -7524,7 +8014,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) @@ -7559,6 +8049,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } + // Update SwiftErrorMap. + if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() && + F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) { + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg; + } + // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { @@ -7656,7 +8154,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) - FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + FuncInfo.PHINodesToUpdate.push_back( + std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; } } @@ -7708,7 +8207,8 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, unsigned First, - unsigned Last) { + unsigned Last, + unsigned Density) { assert(Last >= First); assert(TotalCases[Last] >= TotalCases[First]); @@ -7729,10 +8229,15 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); - return NumCases * 100 >= Range * MinJumpTableDensity; + return NumCases * 100 >= Range * Density; } -static inline bool areJTsAllowed(const TargetLowering &TLI) { +static inline bool areJTsAllowed(const TargetLowering &TLI, + const SwitchInst *SI) { + const Function *Fn = SI->getParent()->getParent(); + if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") + return false; + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } @@ -7826,7 +8331,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI)) + if (!areJTsAllowed(TLI, SI)) return; const int64_t N = Clusters.size(); @@ -7843,7 +8348,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { + unsigned MinDensity = JumpTableDensity; + if (DefaultMBB->getParent()->getFunction()->optForSize()) + MinDensity = OptsizeJumpTableDensity; + if (N >= MinJumpTableSize + && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) { // Cheap case: the whole range might be suitable for jump table. CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { @@ -7888,7 +8397,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - if (isDense(Clusters, &TotalCases[0], i, j)) { + if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); bool IsTable = j - i + 1 >= MinJumpTableSize; unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8fb85ff6ecc7e..b9888ae87639e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,14 +18,14 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Statepoint.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Statepoint.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" +#include <utility> #include <vector> namespace llvm { @@ -101,8 +101,8 @@ class SelectionDAGBuilder { unsigned SDNodeOrder; public: DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } - DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : - DI(di), dl(DL), SDNodeOrder(SDNO) { } + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) + : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {} const DbgValueInst* getDI() { return DI; } DebugLoc getdl() { return dl; } unsigned getSDNodeOrder() { return SDNodeOrder; } @@ -260,8 +260,9 @@ private: }; struct JumpTableHeader { JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, - bool E = false): - First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + bool E = false) + : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), + Emitted(E) {} APInt First; APInt Last; const Value *SValue; @@ -286,9 +287,9 @@ private: BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) - : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), - Prob(Pr) {} + : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), + RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), + Cases(std::move(C)), Prob(Pr) {} APInt First; APInt Range; const Value *SValue; @@ -303,12 +304,9 @@ private: BranchProbability DefaultProb; }; - /// Minimum jump table density, in percent. - enum { MinJumpTableDensity = 40 }; - /// Check whether a range of clusters is dense enough for a jump table. bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, - unsigned First, unsigned Last); + unsigned First, unsigned Last, unsigned MinDensity); /// Build a jump table cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. @@ -457,7 +455,14 @@ private: /// /// c. After we finish selecting the basic block, in FinishBasicBlock if /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is - /// initialized, we first find a splice point in the parent basic block + /// initialized, we produce the validation code with one of these + /// techniques: + /// 1) with a call to a guard check function + /// 2) with inlined instrumentation + /// + /// 1) We insert a call to the check function before the terminator. + /// + /// 2) We first find a splice point in the parent basic block /// before the terminator and then splice the terminator of said basic /// block into the success basic block. Then we code-gen a new tail for /// the parent basic block consisting of the two loads, the comparison, @@ -467,29 +472,31 @@ private: /// the same function, use the same failure basic block). class StackProtectorDescriptor { public: - StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), - FailureMBB(nullptr), Guard(nullptr), - GuardReg(0) { } + StackProtectorDescriptor() + : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {} /// Returns true if all fields of the stack protector descriptor are /// initialized implying that we should/are ready to emit a stack protector. bool shouldEmitStackProtector() const { - return ParentMBB && SuccessMBB && FailureMBB && Guard; + return ParentMBB && SuccessMBB && FailureMBB; + } + + bool shouldEmitFunctionBasedCheckStackProtector() const { + return ParentMBB && !SuccessMBB && !FailureMBB; } /// Initialize the stack protector descriptor structure for a new basic /// block. - void initialize(const BasicBlock *BB, - MachineBasicBlock *MBB, - const CallInst &StackProtCheckCall) { + void initialize(const BasicBlock *BB, MachineBasicBlock *MBB, + bool FunctionBasedInstrumentation) { // Make sure we are not initialized yet. assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " "already initialized!"); ParentMBB = MBB; - SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); - FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); - if (!Guard) - Guard = StackProtCheckCall.getArgOperand(0); + if (!FunctionBasedInstrumentation) { + SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); + } } /// Reset state that changes when we handle different basic blocks. @@ -518,17 +525,11 @@ private: /// always the same. void resetPerFunctionState() { FailureMBB = nullptr; - Guard = nullptr; - GuardReg = 0; } MachineBasicBlock *getParentMBB() { return ParentMBB; } MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } MachineBasicBlock *getFailureMBB() { return FailureMBB; } - const Value *getGuard() { return Guard; } - - unsigned getGuardReg() const { return GuardReg; } - void setGuardReg(unsigned R) { GuardReg = R; } private: /// The basic block for which we are generating the stack protector. @@ -548,13 +549,6 @@ private: /// contain a call to __stack_chk_fail(). MachineBasicBlock *FailureMBB; - /// The guard variable which we will compare against the stored value in the - /// stack protector stack slot. - const Value *Guard; - - /// The virtual register holding the stack guard value. - unsigned GuardReg; - /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic /// block will be created. Assign a large weight if IsLikely is true. @@ -708,28 +702,88 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, const BasicBlock *EHPadBB = nullptr); - std::pair<SDValue, SDValue> lowerCallOperands( - ImmutableCallSite CS, - unsigned ArgIdx, - unsigned NumArgs, - SDValue Callee, - Type *ReturnTy, - const BasicBlock *EHPadBB = nullptr, - bool IsPatchPoint = false); + // Lower range metadata from 0 to N to assert zext to an integer of nearest + // floor power of two. + SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, + SDValue Op); + + void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, + ImmutableCallSite CS, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + Type *ReturnTy, bool IsPatchPoint); + + std::pair<SDValue, SDValue> + lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB = nullptr); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + /// Describes a gc.statepoint or a gc.statepoint like thing for the purposes + /// of lowering into a STATEPOINT node. + struct StatepointLoweringInfo { + /// Bases[i] is the base pointer for Ptrs[i]. Together they denote the set + /// of gc pointers this STATEPOINT has to relocate. + SmallVector<const Value *, 16> Bases; + SmallVector<const Value *, 16> Ptrs; + + /// The set of gc.relocate calls associated with this gc.statepoint. + SmallVector<const GCRelocateInst *, 16> GCRelocates; + + /// The full list of gc arguments to the gc.statepoint being lowered. + ArrayRef<const Use> GCArgs; + + /// The gc.statepoint instruction. + const Instruction *StatepointInstr = nullptr; + + /// The list of gc transition arguments present in the gc.statepoint being + /// lowered. + ArrayRef<const Use> GCTransitionArgs; + + /// The ID that the resulting STATEPOINT instruction has to report. + unsigned ID = -1; + + /// Information regarding the underlying call instruction. + TargetLowering::CallLoweringInfo CLI; + + /// The deoptimization state associated with this gc.statepoint call, if + /// any. + ArrayRef<const Use> DeoptState; + + /// Flags associated with the meta arguments being lowered. + uint64_t StatepointFlags = -1; + + /// The number of patchable bytes the call needs to get lowered into. + unsigned NumPatchBytes = -1; + + /// The exception handling unwind destination, in case this represents an + /// invoke of gc.statepoint. + const BasicBlock *EHPadBB = nullptr; + + explicit StatepointLoweringInfo(SelectionDAG &DAG) : CLI(DAG) {} + }; + + /// Lower \p SLI into a STATEPOINT instruction. + SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI); + // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. void LowerStatepoint(ImmutableStatepoint Statepoint, const BasicBlock *EHPadBB = nullptr); -private: - std::pair<SDValue, SDValue> - lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - const BasicBlock *EHPadBB = nullptr); + void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee, + const BasicBlock *EHPadBB); + + void LowerDeoptimizeCall(const CallInst *CI); + void LowerDeoptimizingReturn(); + + void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee, + const BasicBlock *EHPadBB, + bool VarArgDisallowed, + bool ForceVoidReturnTy); + +private: // Terminator instructions. void visitRet(const ReturnInst &I); void visitBr(const BranchInst &I); @@ -840,6 +894,8 @@ private: bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); + void visitLoadFromSwiftError(const LoadInst &I); + void visitStoreToSwiftError(const StoreInst &I); void visitInlineAsm(ImmutableCallSite CS); const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); @@ -853,10 +909,9 @@ private: void visitPatchpoint(ImmutableCallSite CS, const BasicBlock *EHPadBB = nullptr); - // These three are implemented in StatepointLowering.cpp - void visitStatepoint(const CallInst &I); + // These two are implemented in StatepointLowering.cpp void visitGCRelocate(const GCRelocateInst &I); - void visitGCResult(const CallInst &I); + void visitGCResult(const GCResultInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -870,6 +925,8 @@ private: void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message); + /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. @@ -937,8 +994,7 @@ struct RegsForValue { /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, + const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V = nullptr) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified @@ -946,18 +1002,16 @@ struct RegsForValue { /// as the input and updates them for the output Chain/Flag. If the Flag /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used /// in printing better diagnostic messages on error. - void - getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, - SDValue *Flag, const Value *V = nullptr, - ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, + SDValue &Chain, SDValue *Flag, const Value *V = nullptr, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, SDLoc dl, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const; + void AddInlineAsmOperands(unsigned Kind, bool HasMatching, + unsigned MatchingIdx, const SDLoc &dl, + SelectionDAG &DAG, std::vector<SDValue> &Ops) const; }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a1c6c4c1dd631..401da059dedca 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -101,7 +101,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; + case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; @@ -202,6 +202,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; + case ISD::FCANONICALIZE: return "fcanonicalize"; case ISD::FPOW: return "fpow"; case ISD::SMIN: return "smin"; case ISD::SMAX: return "smax"; @@ -378,7 +379,7 @@ static Printable PrintNodeId(const SDNode &Node) { }); } -void SDNode::dump() const { dump(nullptr); } +LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -590,7 +591,7 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { N->dump(G); } -void SelectionDAG::dump() const { +LLVM_DUMP_METHOD void SelectionDAG::dump() const { dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); @@ -630,7 +631,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, } } -typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { if (!once.insert(N).second) // If we've been here before, return now. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c075da4738ad6..1d61657194c5c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/PostOrderIterator.h" @@ -21,10 +21,10 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,8 +32,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -59,6 +59,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> + using namespace llvm; #define DEBUG_TYPE "isel" @@ -317,7 +318,7 @@ namespace llvm { "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); } -} +} // end namespace llvm // EmitInstrWithCustomInserter - This method should be implemented by targets // that mark instructions with the 'usesCustomInserter' flag. These @@ -329,7 +330,7 @@ namespace llvm { // are modified, the method should insert pairs of <OldSucc, NewSucc> into the // DenseMap. MachineBasicBlock * -TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, +TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { #ifndef NDEBUG dbgs() << "If a target marks an instruction with " @@ -339,9 +340,9 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, llvm_unreachable(nullptr); } -void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, +void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { - assert(!MI->hasPostISelHook() && + assert(!MI.hasPostISelHook() && "If a target marks an instruction with 'hasPostISelHook', " "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); } @@ -376,6 +377,8 @@ SelectionDAGISel::~SelectionDAGISel() { void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); + AU.addRequired<StackProtector>(); + AU.addPreserved<StackProtector>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -440,7 +443,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TM.resetTargetOptions(Fn); // Reset OptLevel to None for optnone functions. CodeGenOpt::Level NewOptLevel = OptLevel; - if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + if (OptLevel != CodeGenOpt::None && skipFunction(Fn)) NewOptLevel = CodeGenOpt::None; OptLevelChanger OLC(*this, NewOptLevel); @@ -468,11 +471,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF->setHasInlineAsm(false); FuncInfo->SplitCSR = false; - SmallVector<MachineBasicBlock*, 4> Returns; // We split CSR if the target supports it for the given function // and the function has only return exits. - if (TLI->supportSplitCSR(MF)) { + if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) { FuncInfo->SplitCSR = true; // Collect all the return blocks. @@ -481,12 +483,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { continue; const TerminatorInst *Term = BB.getTerminator(); - if (isa<UnreachableInst>(Term)) + if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term)) continue; - if (isa<ReturnInst>(Term)) { - Returns.push_back(FuncInfo->MBBMap[&BB]); - continue; - } // Bail out if the exit block is not Return nor Unreachable. FuncInfo->SplitCSR = false; @@ -508,8 +506,21 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); // Insert copies in the entry block and the return blocks. - if (FuncInfo->SplitCSR) + if (FuncInfo->SplitCSR) { + SmallVector<MachineBasicBlock*, 4> Returns; + // Collect all the return blocks. + for (MachineBasicBlock &MBB : mf) { + if (!MBB.succ_empty()) + continue; + + MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && Term->isReturn()) { + Returns.push_back(&MBB); + continue; + } + } TLI->insertCopiesSplitCSR(EntryMBB, Returns); + } DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) @@ -669,7 +680,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, } void SelectionDAGISel::ComputeLiveOutVRegInfo() { - SmallPtrSet<SDNode*, 128> VisitedNodes; + SmallPtrSet<SDNode*, 16> VisitedNodes; SmallVector<SDNode*, 128> Worklist; Worklist.push_back(CurDAG->getRoot().getNode()); @@ -854,7 +865,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Scheduler->Run(CurDAG, FuncInfo->MBB); } - if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph(); + if (ViewSUnitDAGs && MatchFilterBB) + Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. @@ -937,23 +949,7 @@ void SelectionDAGISel::DoInstructionSelection() { if (Node->use_empty()) continue; - SDNode *ResNode = Select(Node); - - // FIXME: This is pretty gross. 'Select' should be changed to not return - // anything at all and this code should be nuked with a tactical strike. - - // If node should not be replaced, continue with the next one. - if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) - continue; - // Replace node. - if (ResNode) { - ReplaceUses(Node, ResNode); - } - - // If after the replacement this node is not used any more, - // remove this dead node. - if (Node->use_empty()) // Don't delete EntryToken, etc. - CurDAG->RemoveDeadNode(Node); + Select(Node); } CurDAG->setRoot(Dummy.getValue()); @@ -1147,7 +1143,125 @@ static void collectFailStats(const Instruction *I) { case Instruction::LandingPad: NumFastIselFailLandingPad++; return; } } -#endif +#endif // NDEBUG + +/// Set up SwiftErrorVals by going through the function. If the function has +/// swifterror argument, it will be the first entry. +static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo) { + if (!TLI->supportSwiftError()) + return; + + FuncInfo->SwiftErrorVals.clear(); + FuncInfo->SwiftErrorMap.clear(); + FuncInfo->SwiftErrorWorklist.clear(); + + // Check if function has a swifterror argument. + for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end(); + AI != AE; ++AI) + if (AI->hasSwiftErrorAttr()) + FuncInfo->SwiftErrorVals.push_back(&*AI); + + for (const auto &LLVMBB : Fn) + for (const auto &Inst : LLVMBB) { + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst)) + if (Alloca->isSwiftError()) + FuncInfo->SwiftErrorVals.push_back(Alloca); + } +} + +/// For each basic block, merge incoming swifterror values or simply propagate +/// them. The merged results will be saved in SwiftErrorMap. For predecessors +/// that are not yet visited, we create virtual registers to hold the swifterror +/// values and save them in SwiftErrorWorklist. +static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const BasicBlock *LLVMBB, + SelectionDAGBuilder *SDB) { + if (!TLI->supportSwiftError()) + return; + + // We should only do this when we have swifterror parameter or swifterror + // alloc. + if (FuncInfo->SwiftErrorVals.empty()) + return; + + // At beginning of a basic block, insert PHI nodes or get the virtual + // register from the only predecessor, and update SwiftErrorMap; if one + // of the predecessors is not visited, update SwiftErrorWorklist. + // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy + // to sync up the virtual register assignment. + + // Always create a virtual register for each swifterror value in entry block. + auto &DL = SDB->DAG.getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + if (pred_begin(LLVMBB) == pred_end(LLVMBB)) { + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + // Assign Undef to Vreg. We construct MI directly to make sure it works + // with FastISel. + BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); + FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); + } + return; + } + + if (auto *UniquePred = LLVMBB->getUniquePredecessor()) { + auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred]; + if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) { + // Update SwiftErrorWorklist with a new virtual register. + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg); + // Propagate the information from the single predecessor. + FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); + } + return; + } + // Propagate the information from the single predecessor. + FuncInfo->SwiftErrorMap[FuncInfo->MBB] = + FuncInfo->SwiftErrorMap[UniquePredMBB]; + return; + } + + // For the case of multiple predecessors, update SwiftErrorWorklist. + // Handle the case where we have two or more predecessors being the same. + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + auto *PredMBB = FuncInfo->MBBMap[*PI]; + if (!FuncInfo->SwiftErrorMap.count(PredMBB) && + !FuncInfo->SwiftErrorWorklist.count(PredMBB)) { + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + // When we actually visit the basic block PredMBB, we will materialize + // the virtual register assignment in copySwiftErrorsToFinalVRegs. + FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg); + } + } + } + + // For the case of multiple predecessors, create a virtual register for + // each swifterror value and generate Phi node. + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); + + MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB, + FuncInfo->MBB->begin(), SDB->getCurDebugLoc(), + TII->get(TargetOpcode::PHI), VReg); + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + auto *PredMBB = FuncInfo->MBBMap[*PI]; + unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ? + FuncInfo->SwiftErrorMap[PredMBB][I] : + FuncInfo->SwiftErrorWorklist[PredMBB][I]; + SwiftErrorPHI.addReg(SwiftErrorReg) + .addMBB(PredMBB); + } + } +} void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. @@ -1155,6 +1269,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (TM.Options.EnableFastISel) FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + setupSwiftErrorVals(Fn, TLI, FuncInfo); + // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); for (ReversePostOrderTraversal<const Function*>::rpo_iterator @@ -1193,6 +1309,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!FuncInfo->MBB) continue; // Some blocks like catchpads have no code or MBB. FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB); // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; @@ -1228,7 +1345,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // where they are, so we can be sure to emit subsequent instructions // after them. if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) - FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); + FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); else FastIS->setLastLocalValue(nullptr); } @@ -1345,6 +1462,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { LowerArguments(Fn); } } + if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) { + bool FunctionBasedInstrumentation = + TLI->getSSPStackGuardCheck(*Fn.getParent()); + SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], + FunctionBasedInstrumentation); + } if (Begin != BI) ++NumDAGBlocks; @@ -1376,15 +1499,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { /// terminator instructors so we can satisfy ABI constraints. A partial /// terminator sequence is an improper subset of a terminator sequence (i.e. it /// may be the whole terminator sequence). -static bool MIIsInTerminatorSequence(const MachineInstr *MI) { +static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // If we do not have a copy or an implicit def, we return true if and only if // MI is a debug value. - if (!MI->isCopy() && !MI->isImplicitDef()) + if (!MI.isCopy() && !MI.isImplicitDef()) // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the // physical registers if there is debug info associated with the terminator // of our mbb. We want to include said debug info in our terminator // sequence, so we return true in that case. - return MI->isDebugValue(); + return MI.isDebugValue(); // We have left the terminator sequence if we are not doing one of the // following: @@ -1394,18 +1517,18 @@ static bool MIIsInTerminatorSequence(const MachineInstr *MI) { // 3. Defining a register via an implicit def. // OPI should always be a register definition... - MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + MachineInstr::const_mop_iterator OPI = MI.operands_begin(); if (!OPI->isReg() || !OPI->isDef()) return false; // Defining any register via an implicit def is always ok. - if (MI->isImplicitDef()) + if (MI.isImplicitDef()) return true; // Grab the copy source... MachineInstr::const_mop_iterator OPI2 = OPI; ++OPI2; - assert(OPI2 != MI->operands_end() + assert(OPI2 != MI.operands_end() && "Should have a copy implying we should have 2 arguments."); // Make sure that the copy dest is not a vreg when the copy source is a @@ -1432,7 +1555,7 @@ static bool MIIsInTerminatorSequence(const MachineInstr *MI) { /// terminator, but additionally the copies that move the vregs into the /// physical registers. static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { +FindSplitPointForStackProtector(MachineBasicBlock *BB) { MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); // if (SplitPoint == BB->begin()) @@ -1442,7 +1565,7 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { MachineBasicBlock::iterator Previous = SplitPoint; --Previous; - while (MIIsInTerminatorSequence(Previous)) { + while (MIIsInTerminatorSequence(*Previous)) { SplitPoint = Previous; if (Previous == Start) break; @@ -1454,7 +1577,6 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { void SelectionDAGISel::FinishBasicBlock() { - DEBUG(dbgs() << "Total amount of phi nodes to update: " << FuncInfo->PHINodesToUpdate.size() << "\n"; for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) @@ -1474,7 +1596,23 @@ SelectionDAGISel::FinishBasicBlock() { } // Handle stack protector. - if (SDB->SPDescriptor.shouldEmitStackProtector()) { + if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) { + // The target provides a guard check function. There is no need to + // generate error handling code or to split current basic block. + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + + // Add load and check to the basicblock. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = + FindSplitPointForStackProtector(ParentMBB); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } else if (SDB->SPDescriptor.shouldEmitStackProtector()) { MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); @@ -1485,7 +1623,7 @@ SelectionDAGISel::FinishBasicBlock() { // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + FindSplitPointForStackProtector(ParentMBB); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, @@ -1502,7 +1640,7 @@ SelectionDAGISel::FinishBasicBlock() { // CodeGen Failure MBB if we have not codegened it yet. MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); - if (!FailureMBB->size()) { + if (FailureMBB->empty()) { FuncInfo->MBB = FailureMBB; FuncInfo->InsertPt = FailureMBB->end(); SDB->visitSPDescriptorFailure(SDB->SPDescriptor); @@ -1515,52 +1653,61 @@ SelectionDAGISel::FinishBasicBlock() { SDB->SPDescriptor.resetPerBBState(); } - for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { + // Lower each BitTestBlock. + for (auto &BTB : SDB->BitTestCases) { // Lower header first, if it wasn't already lowered - if (!SDB->BitTestCases[i].Emitted) { + if (!BTB.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->MBB = BTB.Parent; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); + SDB->visitBitTestHeader(BTB, FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); } - BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob; - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { - UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb; + BranchProbability UnhandledProb = BTB.Prob; + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { + UnhandledProb -= BTB.Cases[j].ExtraProb; // Set the current basic block to the mbb we wish to insert the code into - FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->MBB = BTB.Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code // If all cases cover a contiguous range, it is not necessary to jump to // the default block after the last bit test fails. This is because the // range check during bit test header creation has guaranteed that every - // case here doesn't go outside the range. + // case here doesn't go outside the range. In this case, there is no need + // to perform the last bit test, as it will always be true. Instead, make + // the second-to-last bit-test fall through to the target of the last bit + // test, and delete the last bit test. + MachineBasicBlock *NextMBB; - if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) - NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; - else if (j + 1 != ej) - NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; - else - NextMBB = SDB->BitTestCases[i].Default; + if (BTB.ContiguousRange && j + 2 == ej) { + // Second-to-last bit-test with contiguous range: fall through to the + // target of the final bit test. + NextMBB = BTB.Cases[j + 1].TargetBB; + } else if (j + 1 == ej) { + // For the last bit test, fall through to Default. + NextMBB = BTB.Default; + } else { + // Otherwise, fall through to the next bit test. + NextMBB = BTB.Cases[j + 1].ThisBB; + } - SDB->visitBitTestCase(SDB->BitTestCases[i], - NextMBB, - UnhandledProb, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], + SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); - if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + if (BTB.ContiguousRange && j + 2 == ej) { + // Since we're not going to use the final bit test, remove it. + BTB.Cases.pop_back(); break; + } } // Update PHI Nodes @@ -1571,16 +1718,18 @@ SelectionDAGISel::FinishBasicBlock() { assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and - // from last "case" BB. - if (PHIBB == SDB->BitTestCases[i].Default) - PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) - .addMBB(SDB->BitTestCases[i].Parent) - .addReg(FuncInfo->PHINodesToUpdate[pi].second) - .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB); + // from last "case" BB, unless the latter was skipped. + if (PHIBB == BTB.Default) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent); + if (!BTB.ContiguousRange) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(BTB.Cases.back().ThisBB); + } + } // One of "cases" BB. - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { - MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; + MachineBasicBlock* cBB = BTB.Cases[j].ThisBB; if (cBB->isSuccessor(PHIBB)) PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); } @@ -1685,7 +1834,6 @@ SelectionDAGISel::FinishBasicBlock() { SDB->SwitchCases.clear(); } - /// Create the scheduler. If a specific scheduler was specified /// via the SchedulerRegistry, use it, otherwise select the /// one preferred by the target. @@ -1764,8 +1912,8 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated /// by tblgen. Others should not call it. -void SelectionDAGISel:: -SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) { +void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, + const SDLoc &DL) { std::vector<SDValue> InOps; std::swap(InOps, Ops); @@ -1802,15 +1950,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) { // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], - InlineAsm::getMemoryConstraintID(Flags), - SelOps)) + unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); + if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); // Add this to the output node. unsigned NewFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; @@ -1956,7 +2104,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); } -SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { +void SelectionDAGISel::Select_INLINEASM(SDNode *N) { SDLoc DL(N); std::vector<SDValue> Ops(N->op_begin(), N->op_end()); @@ -1965,11 +2113,11 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { const EVT VTs[] = {MVT::Other, MVT::Glue}; SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); New->setNodeId(-1); - return New.getNode(); + ReplaceUses(N, New.getNode()); + CurDAG->RemoveDeadNode(N); } -SDNode -*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { +void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); @@ -1979,11 +2127,11 @@ SDNode SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); - return New.getNode(); + ReplaceUses(Op, New.getNode()); + CurDAG->RemoveDeadNode(Op); } -SDNode -*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { +void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); @@ -1993,13 +2141,12 @@ SDNode SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); - return New.getNode(); + ReplaceUses(Op, New.getNode()); + CurDAG->RemoveDeadNode(Op); } - - -SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { - return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); +void SelectionDAGISel::Select_UNDEF(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } /// GetVBR - decode a vbr encoding whose top bit is set. @@ -2019,15 +2166,11 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { return Val; } - -/// UpdateChainsAndGlue - When a match is complete, this method updates uses of -/// interior glue and chain results to use the new glue and chain results. -void SelectionDAGISel:: -UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, - const SmallVectorImpl<SDNode*> &ChainNodesMatched, - SDValue InputGlue, - const SmallVectorImpl<SDNode*> &GlueResultNodesMatched, - bool isMorphNodeTo) { +/// When a match is complete, this method updates uses of interior chain results +/// to use the new results. +void SelectionDAGISel::UpdateChains( + SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { SmallVector<SDNode*, 4> NowDeadNodes; // Now that all the normal results are replaced, we replace the chain and @@ -2039,10 +2182,8 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // Replace all the chain results with the final chain we ended up with. for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { SDNode *ChainNode = ChainNodesMatched[i]; - - // If this node was already deleted, don't look at it. - if (ChainNode->getOpcode() == ISD::DELETED_NODE) - continue; + assert(ChainNode->getOpcode() != ISD::DELETED_NODE && + "Deleted node left in chain"); // Don't replace the results of the root node if we're doing a // MorphNodeTo. @@ -2056,35 +2197,12 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. - if (ChainNode->use_empty() && + if (ChainNode != NodeToMatch && ChainNode->use_empty() && !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) NowDeadNodes.push_back(ChainNode); } } - // If the result produces glue, update any glue results in the matched - // pattern with the glue result. - if (InputGlue.getNode()) { - // Handle any interior nodes explicitly marked. - for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { - SDNode *FRN = GlueResultNodesMatched[i]; - - // If this node was already deleted, don't look at it. - if (FRN->getOpcode() == ISD::DELETED_NODE) - continue; - - assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && - "Doesn't have a glue result"); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), - InputGlue); - - // If the node became dead and we haven't already seen it, delete it. - if (FRN->use_empty() && - !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) - NowDeadNodes.push_back(FRN); - } - } - if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); @@ -2108,8 +2226,9 @@ enum ChainResult { /// already selected nodes "below" us. static ChainResult WalkChainUsers(const SDNode *ChainedNode, - SmallVectorImpl<SDNode*> &ChainedNodesInPattern, - SmallVectorImpl<SDNode*> &InteriorChainedNodes) { + SmallVectorImpl<SDNode *> &ChainedNodesInPattern, + DenseMap<const SDNode *, ChainResult> &TokenFactorResult, + SmallVectorImpl<SDNode *> &InteriorChainedNodes) { ChainResult Result = CR_Simple; for (SDNode::use_iterator UI = ChainedNode->use_begin(), @@ -2190,7 +2309,15 @@ WalkChainUsers(const SDNode *ChainedNode, // as a new TokenFactor. // // To distinguish these two cases, do a recursive walk down the uses. - switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) { + auto MemoizeResult = TokenFactorResult.find(User); + bool Visited = MemoizeResult != TokenFactorResult.end(); + // Recursively walk chain users only if the result is not memoized. + if (!Visited) { + auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, + InteriorChainedNodes); + MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; + } + switch (MemoizeResult->second) { case CR_Simple: // If the uses of the TokenFactor are just already-selected nodes, ignore // it, it is "below" our pattern. @@ -2210,9 +2337,10 @@ WalkChainUsers(const SDNode *ChainedNode, // ultimate chain result of the generated code. We will also add its chain // inputs as inputs to the ultimate TokenFactor we create. Result = CR_LeadsToInteriorNode; - ChainedNodesInPattern.push_back(User); - InteriorChainedNodes.push_back(User); - continue; + if (!Visited) { + ChainedNodesInPattern.push_back(User); + InteriorChainedNodes.push_back(User); + } } return Result; @@ -2227,12 +2355,16 @@ WalkChainUsers(const SDNode *ChainedNode, static SDValue HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, SelectionDAG *CurDAG) { + // Used for memoization. Without it WalkChainUsers could take exponential + // time to run. + DenseMap<const SDNode *, ChainResult> TokenFactorResult; // Walk all of the chained nodes we've matched, recursively scanning down the // users of the chain result. This adds any TokenFactor nodes that are caught // in between chained nodes to the chained and interior nodes list. SmallVector<SDNode*, 3> InteriorChainedNodes; for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, + TokenFactorResult, InteriorChainedNodes) == CR_InducesCycle) return SDValue(); // Would induce a cycle. } @@ -2322,8 +2454,10 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Otherwise, no replacement happened because the node already exists. Replace // Uses of the old node with the new one. - if (Res != Node) + if (Res != Node) { CurDAG->ReplaceAllUsesWith(Node, Res); + CurDAG->RemoveDeadNode(Node); + } return Res; } @@ -2534,7 +2668,6 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, } namespace { - struct MatchScope { /// FailIndex - If this match fails, this is the index to continue with. unsigned FailIndex; @@ -2552,7 +2685,7 @@ struct MatchScope { SDValue InputChain, InputGlue; /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. - bool HasChainNodesMatched, HasGlueResultNodesMatched; + bool HasChainNodesMatched; }; /// \\brief A DAG update listener to keep the matching state @@ -2591,11 +2724,11 @@ public: J.setNode(E); } }; -} +} // end anonymous namespace -SDNode *SelectionDAGISel:: -SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, - unsigned TableSize) { +void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, + const unsigned char *MatcherTable, + unsigned TableSize) { // FIXME: Should these even be selected? Handle these cases in the caller? switch (NodeToMatch->getOpcode()) { default: @@ -2623,16 +2756,25 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. - return nullptr; + return; case ISD::AssertSext: case ISD::AssertZext: CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); - return nullptr; - case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); - case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch); - case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch); - case ISD::UNDEF: return Select_UNDEF(NodeToMatch); + CurDAG->RemoveDeadNode(NodeToMatch); + return; + case ISD::INLINEASM: + Select_INLINEASM(NodeToMatch); + return; + case ISD::READ_REGISTER: + Select_READ_REGISTER(NodeToMatch); + return; + case ISD::WRITE_REGISTER: + Select_WRITE_REGISTER(NodeToMatch); + return; + case ISD::UNDEF: + Select_UNDEF(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); @@ -2665,7 +2807,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // which ones they are. The result is captured into this list so that we can // update the chain results when the pattern is complete. SmallVector<SDNode*, 3> ChainNodesMatched; - SmallVector<SDNode*, 3> GlueResultNodesMatched; DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); @@ -2771,7 +2912,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NewEntry.InputChain = InputChain; NewEntry.InputGlue = InputGlue; NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); - NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty(); MatchScopes.push_back(NewEntry); continue; } @@ -2816,6 +2956,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; } + case OPC_MoveChild0: case OPC_MoveChild1: + case OPC_MoveChild2: case OPC_MoveChild3: + case OPC_MoveChild4: case OPC_MoveChild5: + case OPC_MoveChild6: case OPC_MoveChild7: { + unsigned ChildNo = Opcode-OPC_MoveChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + case OPC_MoveParent: // Pop the current node off the NodeStack. NodeStack.pop_back(); @@ -3028,12 +3180,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Imm->getOpcode() == ISD::Constant) { const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); - Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(), - true); + Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch), + Imm.getValueType()); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); - Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch), - Imm.getValueType(), true); + Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch), + Imm.getValueType()); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); @@ -3041,7 +3193,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 - case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 + case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1 + case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2 // These are space-optimized forms of OPC_EmitMergeInputChains. assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); @@ -3049,7 +3202,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, "Should only have one EmitMergeInputChains per match"); // Read all of the chained nodes. - unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; + unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0; assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); @@ -3137,13 +3290,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; } - case OPC_EmitNode: - case OPC_MorphNodeTo: { + case OPC_EmitNode: case OPC_MorphNodeTo: + case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2: + case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: { uint16_t TargetOpc = MatcherTable[MatcherIndex++]; TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; // Get the result VT list. - unsigned NumVTs = MatcherTable[MatcherIndex++]; + unsigned NumVTs; + // If this is one of the compressed forms, get the number of VTs based + // on the Opcode. Otherwise read the next byte from the table. + if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2) + NumVTs = Opcode - OPC_MorphNodeTo0; + else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2) + NumVTs = Opcode - OPC_EmitNode0; + else + NumVTs = MatcherTable[MatcherIndex++]; SmallVector<EVT, 4> VTs; for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = @@ -3205,7 +3367,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Create the node. SDNode *Res = nullptr; - if (Opcode != OPC_MorphNodeTo) { + bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || + (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2); + if (!IsMorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), @@ -3218,13 +3382,17 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, nullptr)); } - } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { - Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } else { - // NodeToMatch was eliminated by CSE when the target changed the DAG. - // We will visit the equivalent node later. - DEBUG(dbgs() << "Node was eliminated by CSE\n"); - return nullptr; + assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE && + "NodeToMatch was removed partway through selection"); + SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, + SDNode *E) { + auto &Chain = ChainNodesMatched; + assert((!E || llvm::find(Chain, N) == Chain.end()) && + "Chain node replaced during MorphNode"); + Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); + }); + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } // If the node had chain/glue results, update our notion of the current @@ -3285,31 +3453,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } DEBUG(dbgs() << " " - << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") + << (IsMorphNodeTo ? "Morphed" : "Created") << " node: "; Res->dump(CurDAG); dbgs() << "\n"); // If this was a MorphNodeTo then we're completely done! - if (Opcode == OPC_MorphNodeTo) { - // Update chain and glue uses. - UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, - InputGlue, GlueResultNodesMatched, true); - return Res; - } - - continue; - } - - case OPC_MarkGlueResults: { - unsigned NumNodes = MatcherTable[MatcherIndex++]; - - // Read and remember all the glue-result nodes. - for (unsigned i = 0; i != NumNodes; ++i) { - unsigned RecNo = MatcherTable[MatcherIndex++]; - if (RecNo & 128) - RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - - assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); - GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + if (IsMorphNodeTo) { + // Update chain uses. + UpdateChains(Res, InputChain, ChainNodesMatched, true); + return; } continue; } @@ -3341,20 +3492,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); } - // If the root node defines glue, add it to the glue nodes to update list. - if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue) - GlueResultNodesMatched.push_back(NodeToMatch); + // Update chain uses. + UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false); - // Update chain and glue uses. - UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, - InputGlue, GlueResultNodesMatched, false); + // If the root node defines glue, we need to update it to the glue result. + // TODO: This never happens in our tests and I think it can be removed / + // replaced with an assert, but if we do it this the way the change is + // NFC. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == + MVT::Glue && + InputGlue.getNode()) + CurDAG->ReplaceAllUsesOfValueWith( + SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); assert(NodeToMatch->use_empty() && "Didn't replace all uses of the node?"); + CurDAG->RemoveDeadNode(NodeToMatch); - // FIXME: We just return here, which interacts correctly with SelectRoot - // above. We should fix this to not return an SDNode* anymore. - return nullptr; + return; } } @@ -3366,7 +3521,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); - return nullptr; + return; } // Restore the interpreter state back to the point where the scope was @@ -3387,8 +3542,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, InputGlue = LastScope.InputGlue; if (!LastScope.HasChainNodesMatched) ChainNodesMatched.clear(); - if (!LastScope.HasGlueResultNodesMatched) - GlueResultNodesMatched.clear(); // Check to see what the offset is at the new MatcherIndex. If it is zero // we have reached the end of this scope, otherwise we have another child @@ -3411,8 +3564,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } } - - void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp index 00db94256844e..55f70f7d9fd3f 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===// +//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This implements the TargetSelectionDAGInfo class. +// This implements the SelectionDAGTargetInfo class. // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" using namespace llvm; -TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { -} +SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {} diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 02545a7306560..90aaba2472654 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -53,13 +53,10 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { "Trying to visit statepoint before finished processing previous one"); Locations.clear(); NextSlotToAllocate = 0; - // Need to resize this on each safepoint - we need the two to stay in - // sync and the clear patterns of a SelectionDAGBuilder have no relation - // to FunctionLoweringInfo. + // Need to resize this on each safepoint - we need the two to stay in sync and + // the clear patterns of a SelectionDAGBuilder have no relation to + // FunctionLoweringInfo. SmallBitVector::reset initializes all bits to false. AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); - for (size_t i = 0; i < AllocatedStackSlots.size(); i++) { - AllocatedStackSlots[i] = false; - } } void StatepointLoweringState::clear() { @@ -72,49 +69,46 @@ void StatepointLoweringState::clear() { SDValue StatepointLoweringState::allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder) { - NumSlotsAllocatedForStatepoints++; + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - // The basic scheme here is to first look for a previously created stack slot - // which is not in use (accounting for the fact arbitrary slots may already - // be reserved), or to create a new stack slot and use it. - - // If this doesn't succeed in 40000 iterations, something is seriously wrong - for (int i = 0; i < 40000; i++) { - assert(Builder.FuncInfo.StatepointStackSlots.size() == - AllocatedStackSlots.size() && - "broken invariant"); - const size_t NumSlots = AllocatedStackSlots.size(); - assert(NextSlotToAllocate <= NumSlots && "broken invariant"); - - if (NextSlotToAllocate >= NumSlots) { - assert(NextSlotToAllocate == NumSlots); - // record stats - if (NumSlots + 1 > StatepointMaxSlotsRequired) { - StatepointMaxSlotsRequired = NumSlots + 1; - } + unsigned SpillSize = ValueType.getSizeInBits() / 8; + assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); - SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); - const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); - auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - MFI->markAsStatepointSpillSlotObjectIndex(FI); + // First look for a previously created stack slot which is not in + // use (accounting for the fact arbitrary slots may already be + // reserved), or to create a new stack slot and use it. - Builder.FuncInfo.StatepointStackSlots.push_back(FI); - AllocatedStackSlots.push_back(true); - return SpillSlot; - } - if (!AllocatedStackSlots[NextSlotToAllocate]) { + const size_t NumSlots = AllocatedStackSlots.size(); + assert(NextSlotToAllocate <= NumSlots && "Broken invariant"); + + // The stack slots in StatepointStackSlots beyond the first NumSlots were + // added in this instance of StatepointLoweringState, and cannot be re-used. + assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() && + "Broken invariant"); + + for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { + if (!AllocatedStackSlots.test(NextSlotToAllocate)) { const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; - AllocatedStackSlots[NextSlotToAllocate] = true; - return Builder.DAG.getFrameIndex(FI, ValueType); + if (MFI->getObjectSize(FI) == SpillSize) { + AllocatedStackSlots.set(NextSlotToAllocate); + return Builder.DAG.getFrameIndex(FI, ValueType); + } } - // Note: We deliberately choose to advance this only on the failing path. - // Doing so on the succeeding path involves a bit of complexity that caused - // a minor bug previously. Unless performance shows this matters, please - // keep this code as simple as possible. - NextSlotToAllocate++; } - llvm_unreachable("infinite loop?"); + + // Couldn't find a free slot, so create a new one: + + SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); + const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + MFI->markAsStatepointSpillSlotObjectIndex(FI); + + Builder.FuncInfo.StatepointStackSlots.push_back(FI); + + StatepointMaxSlotsRequired = std::max<unsigned long>( + StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size()); + + return SpillSlot; } /// Utility function for reservePreviousStackSlotForValue. Tries to find @@ -125,24 +119,23 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, int LookUpDepth) { // Can not look any further - give up now if (LookUpDepth <= 0) - return Optional<int>(); + return None; // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { - FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()]; + const auto &SpillMap = + Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()]; auto It = SpillMap.find(Relocate->getDerivedPtr()); if (It == SpillMap.end()) - return Optional<int>(); + return None; return It->second; } // Look through bitcast instructions. - if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) { + if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); - } // Look through phi nodes // All incoming values should have same known stack slot, otherwise result @@ -154,10 +147,10 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, Optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot.hasValue()) - return Optional<int>(); + return None; if (MergedResult.hasValue() && *MergedResult != *SpillSlot) - return Optional<int>(); + return None; MergedResult = SpillSlot; } @@ -192,7 +185,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // which we visit values is unspecified. // Don't know any information about this instruction - return Optional<int>(); + return None; } /// Try to find existing copies of the incoming values in stack slots used for @@ -213,7 +206,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); if (OldLocation.getNode()) - // duplicates in input + // Duplicates in input return; const int LookUpDepth = 6; @@ -222,14 +215,14 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, if (!Index.hasValue()) return; - auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), - Builder.FuncInfo.StatepointStackSlots.end(), *Index); - assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() && - "value spilled to the unknown stack slot"); + const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots; + + auto SlotIt = find(StatepointSlots, *Index); + assert(SlotIt != StatepointSlots.end() && + "Value spilled to the unknown stack slot"); // This is one of our dedicated lowering slots - const int Offset = - std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + const int Offset = std::distance(StatepointSlots.begin(), SlotIt); if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { // stack slot already assigned to someone else, can't use it! // TODO: currently we reserve space for gc arguments after doing @@ -252,24 +245,30 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, /// is not required for correctness. It's purpose is to reduce the size of /// StackMap section. It has no effect on the number of spill slots required /// or the actual lowering. -static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, - SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const Value *> &Relocs, - SelectionDAGBuilder &Builder) { - - // This is horribly inefficient, but I don't care right now - SmallSet<SDValue, 64> Seen; - - SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; - for (size_t i = 0; i < Ptrs.size(); i++) { +static void +removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const GCRelocateInst *> &Relocs, + SelectionDAGBuilder &Builder, + FunctionLoweringInfo::StatepointSpillMap &SSM) { + DenseMap<SDValue, const Value *> Seen; + + SmallVector<const Value *, 64> NewBases, NewPtrs; + SmallVector<const GCRelocateInst *, 64> NewRelocs; + for (size_t i = 0, e = Ptrs.size(); i < e; i++) { SDValue SD = Builder.getValue(Ptrs[i]); - // Only add non-duplicates - if (Seen.count(SD) == 0) { + auto SeenIt = Seen.find(SD); + + if (SeenIt == Seen.end()) { + // Only add non-duplicates NewBases.push_back(Bases[i]); NewPtrs.push_back(Ptrs[i]); NewRelocs.push_back(Relocs[i]); + Seen[SD] = Ptrs[i]; + } else { + // Duplicate pointer found, note in SSM and move on: + SSM.DuplicateMap[Ptrs[i]] = SeenIt->second; } - Seen.insert(SD); } assert(Bases.size() >= NewBases.size()); assert(Ptrs.size() >= NewPtrs.size()); @@ -284,43 +283,13 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result -static SDNode * -lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, - SelectionDAGBuilder &Builder, - SmallVectorImpl<SDValue> &PendingExports) { - - ImmutableCallSite CS(ISP.getCallSite()); - - SDValue ActualCallee; - - if (ISP.getNumPatchBytes() > 0) { - // If we've been asked to emit a nop sequence instead of a call instruction - // for this statepoint then don't lower the call target, but use a constant - // `null` instead. Not lowering the call target lets statepoint clients get - // away without providing a physical address for the symbolic call target at - // link time. - - const auto &TLI = Builder.DAG.getTargetLoweringInfo(); - const auto &DL = Builder.DAG.getDataLayout(); - - unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); - ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), - TLI.getPointerTy(DL, AS)); - } else - ActualCallee = Builder.getValue(ISP.getCalledValue()); - - assert(CS.getCallingConv() != CallingConv::AnyReg && - "anyregcc is not supported on statepoints!"); - - Type *DefTy = ISP.getActualReturnType(); - bool HasDef = !DefTy->isVoidTy(); +static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( + SelectionDAGBuilder::StatepointLoweringInfo &SI, + SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { SDValue ReturnValue, CallEndVal; - std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( - ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, - false /* IsPatchPoint */); - + std::tie(ReturnValue, CallEndVal) = + Builder.lowerInvokable(SI.CLI, SI.EHPadBB); SDNode *CallEnd = CallEndVal.getNode(); // Get a call instruction from the call sequence chain. Tail calls are not @@ -339,6 +308,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, // to grab the return value from the return register(s), or it can be a LOAD // to load a value returned by reference via a stack slot. + bool HasDef = !SI.CLI.RetTy->isVoidTy(); if (HasDef) { if (CallEnd->getOpcode() == ISD::LOAD) CallEnd = CallEnd->getOperand(0).getNode(); @@ -348,70 +318,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, } assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - - // Export the result value if needed - const Instruction *GCResult = ISP.getGCResult(); - if (HasDef && GCResult) { - if (GCResult->getParent() != CS.getParent()) { - // Result value will be used in a different basic block so we need to - // export it now. - // Default exporting mechanism will not work here because statepoint call - // has a different type than the actual call. It means that by default - // llvm will create export register of the wrong type (always i32 in our - // case). So instead we need to create export register with correct type - // manually. - // TODO: To eliminate this problem we can remove gc.result intrinsics - // completely and make statepoint call to return a tuple. - unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); - RegsForValue RFV( - *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), - Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType()); - SDValue Chain = Builder.DAG.getEntryNode(); - - RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, - nullptr); - PendingExports.push_back(Chain); - Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; - } else { - // Result value will be used in a same basic block. Don't export it or - // perform any explicit register copies. - // We'll replace the actuall call node shortly. gc_result will grab - // this value. - Builder.setValue(CS.getInstruction(), ReturnValue); - } - } else { - // The token value is never used from here on, just generate a poison value - Builder.setValue(CS.getInstruction(), - Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc())); - } - - return CallEnd->getOperand(0).getNode(); -} - -/// Callect all gc pointers coming into statepoint intrinsic, clean them up, -/// and return two arrays: -/// Bases - base pointers incoming to this statepoint -/// Ptrs - derived pointers incoming to this statepoint -/// Relocs - the gc_relocate corresponding to each base/ptr pair -/// Elements of this arrays should be in one-to-one correspondence with each -/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call -static void getIncomingStatepointGCValues( - SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite, - SelectionDAGBuilder &Builder) { - for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { - Relocs.push_back(Relocate); - Bases.push_back(Relocate->getBasePtr()); - Ptrs.push_back(Relocate->getDerivedPtr()); - } - - // Remove any redundant llvm::Values which map to the same SDValue as another - // input. Also has the effect of removing duplicates in the original - // llvm::Value input list as well. This is a useful optimization for - // reducing the size of the StackMap section. It has no other impact. - removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder); - - assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size()); + return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode()); } /// Spill a value incoming to the statepoint. It might be either part of @@ -429,7 +336,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, if (!Loc.getNode()) { Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), Builder); - assert(isa<FrameIndexSDNode>(Loc)); int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); @@ -437,10 +343,22 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // TODO: We can create TokenFactor node instead of // chaining stores one after another, this may allow // a bit more optimal scheduling for them + +#ifndef NDEBUG + // Right now we always allocate spill slots that are of the same + // size as the value we're about to spill (the size of spillee can + // vary since we spill vectors of pointers too). At some point we + // can consider allowing spills of smaller values to larger slots + // (i.e. change the '==' in the assert below to a '>='). + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + assert((MFI->getObjectSize(Index) * 8) == + Incoming.getValueType().getSizeInBits() && + "Bad spill: stack slot does not match!"); +#endif + Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, MachinePointerInfo::getFixedStack( - Builder.DAG.getMachineFunction(), Index), - false, false, 0); + Builder.DAG.getMachineFunction(), Index)); Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -478,8 +396,7 @@ static void lowerIncomingStatepointValue(SDValue Incoming, // spill location. This would be a useful optimization, but would // need to be optional since it requires a lot of complexity on the // runtime side which not all would support. - std::pair<SDValue, SDValue> Res = - spillIncomingStatepointValue(Incoming, Chain, Builder); + auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); Ops.push_back(Res.first); Chain = Res.second; } @@ -494,43 +411,37 @@ static void lowerIncomingStatepointValue(SDValue Incoming, /// completion, 'Ops' will contain ready to use operands for machine code /// statepoint. The chain nodes will have already been created and the DAG root /// will be set to the last value spilled (if any were). -static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, - ImmutableStatepoint StatepointSite, - SelectionDAGBuilder &Builder) { - - // Lower the deopt and gc arguments for this statepoint. Layout will - // be: deopt argument length, deopt arguments.., gc arguments... - - SmallVector<const Value *, 64> Bases, Ptrs, Relocations; - getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite, - Builder); - +static void +lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder::StatepointLoweringInfo &SI, + SelectionDAGBuilder &Builder) { + // Lower the deopt and gc arguments for this statepoint. Layout will be: + // deopt argument length, deopt arguments.., gc arguments... #ifndef NDEBUG - // Check that each of the gc pointer and bases we've gotten out of the - // safepoint is something the strategy thinks might be a pointer (or vector - // of pointers) into the GC heap. This is basically just here to help catch - // errors during statepoint insertion. TODO: This should actually be in the - // Verifier, but we can't get to the GCStrategy from there (yet). - GCStrategy &S = Builder.GFI->getStrategy(); - for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed base pointer found in statepoint"); - } - } - for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed derived pointer found in statepoint"); + if (auto *GFI = Builder.GFI) { + // Check that each of the gc pointer and bases we've gotten out of the + // safepoint is something the strategy thinks might be a pointer (or vector + // of pointers) into the GC heap. This is basically just here to help catch + // errors during statepoint insertion. TODO: This should actually be in the + // Verifier, but we can't get to the GCStrategy from there (yet). + GCStrategy &S = GFI->getStrategy(); + for (const Value *V : SI.Bases) { + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); + } } - } - for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { - assert(Opt.getValue() && "non gc managed pointer relocated"); + for (const Value *V : SI.Ptrs) { + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); + } } + } else { + assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!"); + assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!"); } #endif @@ -539,30 +450,23 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // particular value. This is purely an optimization over the code below and // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. - for (const Value *V : StatepointSite.vm_state_args()) { + for (const Value *V : SI.DeoptState) { reservePreviousStackSlotForValue(V, Builder); } - for (unsigned i = 0; i < Bases.size(); ++i) { - reservePreviousStackSlotForValue(Bases[i], Builder); - reservePreviousStackSlotForValue(Ptrs[i], Builder); + for (unsigned i = 0; i < SI.Bases.size(); ++i) { + reservePreviousStackSlotForValue(SI.Bases[i], Builder); + reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); } // First, prefix the list with the number of unique values to be // lowered. Note that this is the number of *Values* not the // number of SDValues required to lower them. - const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs(); + const int NumVMSArgs = SI.DeoptState.size(); pushStackMapConstant(Ops, Builder, NumVMSArgs); - assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(), - StatepointSite.vm_state_end())); - - // The vm state arguments are lowered in an opaque manner. We do - // not know what type of values are contained within. We skip the - // first one since that happens to be the total number we lowered - // explicitly just above. We could have left it in the loop and - // not done it explicitly, but it's far easier to understand this - // way. - for (const Value *V : StatepointSite.vm_state_args()) { + // The vm state arguments are lowered in an opaque manner. We do not know + // what type of values are contained within. + for (const Value *V : SI.DeoptState) { SDValue Incoming = Builder.getValue(V); lowerIncomingStatepointValue(Incoming, Ops, Builder); } @@ -572,11 +476,11 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // arrays interwoven with each (lowered) base pointer immediately followed by // it's (lowered) derived pointer. i.e // (base[0], ptr[0], base[1], ptr[1], ...) - for (unsigned i = 0; i < Bases.size(); ++i) { - const Value *Base = Bases[i]; + for (unsigned i = 0; i < SI.Bases.size(); ++i) { + const Value *Base = SI.Bases[i]; lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); - const Value *Ptr = Ptrs[i]; + const Value *Ptr = SI.Ptrs[i]; lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); } @@ -585,7 +489,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // allocas and give control over placement to the consumer. In this case, // it is the contents of the slot which may get updated, not the pointer to // the alloca - for (Value *V : StatepointSite.gc_args()) { + for (Value *V : SI.GCArgs) { SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { // This handles allocas as arguments to the statepoint @@ -597,18 +501,16 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // Record computed locations for all lowered values. // This can not be embedded in lowering loops as we need to record *all* // values, while previous loops account only values with unique SDValues. - const Instruction *StatepointInstr = - StatepointSite.getCallSite().getInstruction(); - FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; + const Instruction *StatepointInstr = SI.StatepointInstr; + auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr]; - for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { + for (const GCRelocateInst *Relocate : SI.GCRelocates) { const Value *V = Relocate->getDerivedPtr(); SDValue SDV = Builder.getValue(V); SDValue Loc = Builder.StatepointLowering.getLocation(SDV); if (Loc.getNode()) { - SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas // and constants. For this values we can avoid emitting spill load while @@ -616,7 +518,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // Actually we do not need to record them in this map at all. // We do this only to check that we are not relocating any unvisited // value. - SpillMap[V] = None; + SpillMap.SlotMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. @@ -630,16 +532,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, } } -void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { - // Check some preconditions for sanity - assert(isStatepoint(&CI) && - "function called must be the statepoint function"); - - LowerStatepoint(ImmutableStatepoint(&CI)); -} - -void SelectionDAGBuilder::LowerStatepoint( - ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { +SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( + SelectionDAGBuilder::StatepointLoweringInfo &SI) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -648,36 +542,36 @@ void SelectionDAGBuilder::LowerStatepoint( // Clear state StatepointLowering.startNewStatepoint(*this); - ImmutableCallSite CS(ISP.getCallSite()); - #ifndef NDEBUG - // Consistency check. Check only relocates in the same basic block as thier - // statepoint. - for (const User *U : CS->users()) { - const CallInst *Call = cast<CallInst>(U); - if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent()) - StatepointLowering.scheduleRelocCall(*Call); - } + // We schedule gc relocates before removeDuplicateGCPtrs since we _will_ + // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs. + for (auto *Reloc : SI.GCRelocates) + if (Reloc->getParent() == SI.StatepointInstr->getParent()) + StatepointLowering.scheduleRelocCall(*Reloc); #endif -#ifndef NDEBUG - // If this is a malformed statepoint, report it early to simplify debugging. - // This should catch any IR level mistake that's made when constructing or - // transforming statepoints. - ISP.verify(); - - // Check that the associated GCStrategy expects to encounter statepoints. - assert(GFI->getStrategy().useStatepoints() && - "GCStrategy does not expect to encounter statepoints"); -#endif + // Remove any redundant llvm::Values which map to the same SDValue as another + // input. Also has the effect of removing duplicates in the original + // llvm::Value input list as well. This is a useful optimization for + // reducing the size of the StackMap section. It has no other impact. + removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, + FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); + assert(SI.Bases.size() == SI.Ptrs.size() && + SI.Ptrs.size() == SI.GCRelocates.size()); // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; - lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this); + lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this); + + // Now that we've emitted the spills, we need to update the root so that the + // call sequence is ordered correctly. + SI.CLI.setChain(getRoot()); // Get call node, we will replace it later with statepoint - SDNode *CallNode = - lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports); + SDValue ReturnVal; + SDNode *CallNode; + std::tie(ReturnVal, CallNode) = + lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -700,8 +594,8 @@ void SelectionDAGBuilder::LowerStatepoint( // followed by a SRCVALUE for the pointer that may be used during lowering // (e.g. to form MachinePointerInfo values for loads/stores). const bool IsGCTransition = - (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) == - (uint64_t)StatepointFlags::GCTransition; + (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) == + (uint64_t)StatepointFlags::GCTransition; if (IsGCTransition) { SmallVector<SDValue, 8> TSOps; @@ -709,7 +603,7 @@ void SelectionDAGBuilder::LowerStatepoint( TSOps.push_back(Chain); // Add GC transition arguments - for (const Value *V : ISP.gc_transition_args()) { + for (const Value *V : SI.GCTransitionArgs) { TSOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TSOps.push_back(DAG.getSrcValue(V)); @@ -734,9 +628,9 @@ void SelectionDAGBuilder::LowerStatepoint( SmallVector<SDValue, 40> Ops; // Add the <id> and <numBytes> constants. - Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64)); + Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64)); Ops.push_back( - DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32)); + DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32)); // Calculate and push starting position of vmstate arguments // Get number of arguments incoming directly into call node @@ -758,13 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint( Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); // Add a constant argument for the calling convention - pushStackMapConstant(Ops, *this, CS.getCallingConv()); + pushStackMapConstant(Ops, *this, SI.CLI.CallConv); // Add a constant argument for the flags - uint64_t Flags = ISP.getFlags(); - assert( - ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) - && "unknown flag used"); + uint64_t Flags = SI.StatepointFlags; + assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) && + "Unknown flag used"); pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments @@ -800,7 +693,7 @@ void SelectionDAGBuilder::LowerStatepoint( TEOps.push_back(SDValue(StatepointMCNode, 0)); // Add GC transition arguments - for (const Value *V : ISP.gc_transition_args()) { + for (const Value *V : SI.GCTransitionArgs) { TEOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TEOps.push_back(DAG.getSrcValue(V)); @@ -830,19 +723,154 @@ void SelectionDAGBuilder::LowerStatepoint( // return value of each gc.relocate to the respective output of the // previously emitted STATEPOINT value. Unfortunately, this doesn't appear // to actually be possible today. + + return ReturnVal; +} + +void +SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, + const BasicBlock *EHPadBB /*= nullptr*/) { + assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg && + "anyregcc is not supported on statepoints!"); + +#ifndef NDEBUG + // If this is a malformed statepoint, report it early to simplify debugging. + // This should catch any IR level mistake that's made when constructing or + // transforming statepoints. + ISP.verify(); + + // Check that the associated GCStrategy expects to encounter statepoints. + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); +#endif + + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = DAG.getTargetLoweringInfo(); + const auto &DL = DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS)); + } else { + ActualCallee = getValue(ISP.getCalledValue()); + } + + StatepointLoweringInfo SI(DAG); + populateCallLoweringInfo(SI.CLI, ISP.getCallSite(), + ImmutableStatepoint::CallArgsBeginPos, + ISP.getNumCallArgs(), ActualCallee, + ISP.getActualReturnType(), false /* IsPatchPoint */); + + for (const GCRelocateInst *Relocate : ISP.getRelocates()) { + SI.GCRelocates.push_back(Relocate); + SI.Bases.push_back(Relocate->getBasePtr()); + SI.Ptrs.push_back(Relocate->getDerivedPtr()); + } + + SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); + SI.StatepointInstr = ISP.getInstruction(); + SI.GCTransitionArgs = + ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); + SI.ID = ISP.getID(); + SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end()); + SI.StatepointFlags = ISP.getFlags(); + SI.NumPatchBytes = ISP.getNumPatchBytes(); + SI.EHPadBB = EHPadBB; + + SDValue ReturnValue = LowerAsSTATEPOINT(SI); + + // Export the result value if needed + const GCResultInst *GCResult = ISP.getGCResult(); + Type *RetTy = ISP.getActualReturnType(); + if (!RetTy->isVoidTy() && GCResult) { + if (GCResult->getParent() != ISP.getCallSite().getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. Default exporting mechanism will not work here because + // statepoint call has a different type than the actual call. It means + // that by default llvm will create export register of the wrong type + // (always i32 in our case). So instead we need to create export register + // with correct type manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completely and make statepoint call to return a tuple. + unsigned Reg = FuncInfo.CreateRegs(RetTy); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), Reg, RetTy); + SDValue Chain = DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); + PendingExports.push_back(Chain); + FuncInfo.ValueMap[ISP.getInstruction()] = Reg; + } else { + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab + // this value. + setValue(ISP.getInstruction(), ReturnValue); + } + } else { + // The token value is never used from here on, just generate a poison value + setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc())); + } +} + +void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( + ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB, + bool VarArgDisallowed, bool ForceVoidReturnTy) { + StatepointLoweringInfo SI(DAG); + unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin(); + populateCallLoweringInfo( + SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee, + ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(), + false); + if (!VarArgDisallowed) + SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg(); + + auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt); + + unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID; + + auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes()); + SI.ID = SD.StatepointID.getValueOr(DefaultID); + SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0); + + SI.DeoptState = + ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end()); + SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None); + SI.EHPadBB = EHPadBB; + + // NB! The GC arguments are deliberately left empty. + + if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) { + const Instruction *Inst = CS.getInstruction(); + ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal); + setValue(Inst, ReturnVal); + } } -void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { +void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( + ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) { + LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB, + /* VarArgDisallowed = */ false, + /* ForceVoidReturnTy = */ false); +} + +void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. - Instruction *I = cast<Instruction>(CI.getArgOperand(0)); - assert(isStatepoint(I) && "first argument must be a statepoint token"); + const Instruction *I = CI.getStatepoint(); if (I->getParent() != CI.getParent()) { // Statepoint is in different basic block so we should have stored call // result in a virtual register. // We can not use default getValue() functionality to copy value from this - // register because statepoint and actuall call return types can be + // register because statepoint and actual call return types can be // different, and getValue() will use CopyFromReg of the wrong type, // which is always i32 in our case. PointerType *CalleeType = cast<PointerType>( @@ -864,20 +892,21 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { // We skip this check for relocates not in the same basic block as thier // statepoint. It would be too expensive to preserve validation info through // different basic blocks. - if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) { + if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); - } + + auto *Ty = Relocate.getType()->getScalarType(); + if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) + assert(*IsManaged && "Non gc managed pointer relocated!"); #endif const Value *DerivedPtr = Relocate.getDerivedPtr(); SDValue SD = getValue(DerivedPtr); - FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()]; - - // We should have recorded location for this pointer - assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value"); - Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr]; + auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; + auto SlotIt = SpillMap.find(DerivedPtr); + assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); + Optional<int> DerivedPtrLocation = SlotIt->second; // We didn't need to spill these special cases (constants and allocas). // See the handling in spillIncomingValueForStatepoint for detail. @@ -897,8 +926,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { SDValue SpillLoad = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), - *DerivedPtrLocation), - false, false, false, 0); + *DerivedPtrLocation)); // Again, be conservative, don't emit pending loads DAG.setRoot(SpillLoad.getValue(1)); @@ -906,3 +934,25 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { assert(SpillLoad.getNode()); setValue(&Relocate, SpillLoad); } + +void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) { + const auto &TLI = DAG.getTargetLoweringInfo(); + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE), + TLI.getPointerTy(DAG.getDataLayout())); + + // We don't lower calls to __llvm_deoptimize as varargs, but as a regular + // call. We also do not lower the return value to any virtual register, and + // change the immediately following return to a trap instruction. + LowerCallSiteWithDeoptBundleImpl(CI, Callee, /* EHPadBB = */ nullptr, + /* VarArgDisallowed = */ true, + /* ForceVoidReturnTy = */ true); +} + +void SelectionDAGBuilder::LowerDeoptimizingReturn() { + // We do not lower the return value from llvm.deoptimize to any virtual + // register, and change the immediately following return to a trap + // instruction. + if (DAG.getTarget().Options.TrapUnreachable) + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h index 82d0c62f1c30d..b043184003a09 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -16,9 +16,9 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include <vector> namespace llvm { class SelectionDAGBuilder; @@ -45,15 +45,17 @@ public: /// statepoint. Will return SDValue() if this value hasn't been /// spilled. Otherwise, the value has already been spilled and no /// further action is required by the caller. - SDValue getLocation(SDValue val) { - if (!Locations.count(val)) + SDValue getLocation(SDValue Val) { + auto I = Locations.find(Val); + if (I == Locations.end()) return SDValue(); - return Locations[val]; + return I->second; } - void setLocation(SDValue val, SDValue Location) { - assert(!Locations.count(val) && + + void setLocation(SDValue Val, SDValue Location) { + assert(!Locations.count(Val) && "Trying to allocate already allocated location"); - Locations[val] = Location; + Locations[Val] = Location; } /// Record the fact that we expect to encounter a given gc_relocate @@ -62,16 +64,15 @@ public: void scheduleRelocCall(const CallInst &RelocCall) { PendingGCRelocateCalls.push_back(&RelocCall); } + /// Remove this gc_relocate from the list we're expecting to see /// before the next statepoint. If we weren't expecting to see /// it, we'll report an assertion. void relocCallVisited(const CallInst &RelocCall) { - SmallVectorImpl<const CallInst *>::iterator itr = - std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(), - &RelocCall); - assert(itr != PendingGCRelocateCalls.end() && + auto I = find(PendingGCRelocateCalls, &RelocCall); + assert(I != PendingGCRelocateCalls.end() && "Visited unexpected gcrelocate call"); - PendingGCRelocateCalls.erase(itr); + PendingGCRelocateCalls.erase(I); } // TODO: Should add consistency tracking to ensure we encounter @@ -84,14 +85,15 @@ public: void reserveStackSlot(int Offset) { assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && "out of bounds"); - assert(!AllocatedStackSlots[Offset] && "already reserved!"); + assert(!AllocatedStackSlots.test(Offset) && "already reserved!"); assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); - AllocatedStackSlots[Offset] = true; + AllocatedStackSlots.set(Offset); } + bool isStackSlotAllocated(int Offset) { assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && "out of bounds"); - return AllocatedStackSlots[Offset]; + return AllocatedStackSlots.test(Offset); } private: @@ -103,7 +105,7 @@ private: /// whether it has been used in the current statepoint. Since we try to /// preserve stack slots across safepoints, there can be gaps in which /// slots have been allocated. - SmallVector<bool, 50> AllocatedStackSlots; + SmallBitVector AllocatedStackSlots; /// Points just beyond the last slot known to have been allocated unsigned NextSlotToAllocate; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c64d882d69a46..f2bc88a985974 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -14,10 +14,11 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -25,7 +26,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -43,6 +43,10 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } +bool TargetLowering::isPositionIndependent() const { + return getTargetMachine().isPositionIndependent(); +} + /// Check whether a given call node is in tail position within its function. If /// so, it sets Chain to the input chain of the tail call. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, @@ -65,6 +69,31 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return isUsedByReturnOnly(Node, Chain); } +bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, + const uint32_t *CallerPreservedMask, + const SmallVectorImpl<CCValAssign> &ArgLocs, + const SmallVectorImpl<SDValue> &OutVals) const { + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + const CCValAssign &ArgLoc = ArgLocs[I]; + if (!ArgLoc.isRegLoc()) + continue; + unsigned Reg = ArgLoc.getLocReg(); + // Only look at callee saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) + continue; + // Check that we pass the value used for the caller. + // (We look for a CopyFromReg reading a virtual register that is used + // for the function live-in value of register Reg) + SDValue Value = OutVals[I]; + if (Value->getOpcode() != ISD::CopyFromReg) + return false; + unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); + if (MRI.getLiveInPhysReg(ArgReg) != Reg) + return false; + } + return true; +} + /// \brief Set CallLoweringInfo attribute flags based on a call instruction /// and called function attributes. void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, @@ -77,17 +106,17 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf); + isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError); Alignment = CS->getParamAlignment(AttrIdx); } /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. std::pair<SDValue, SDValue> -TargetLowering::makeLibCall(SelectionDAG &DAG, - RTLIB::Libcall LC, EVT RetVT, - ArrayRef<SDValue> Ops, - bool isSigned, SDLoc dl, - bool doesNotReturn, +TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, + ArrayRef<SDValue> Ops, bool isSigned, + const SDLoc &dl, bool doesNotReturn, bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); @@ -110,7 +139,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, TargetLowering::CallLoweringInfo CLI(DAG); bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) .setSExtResult(signExtend).setZExtResult(!signExtend); return LowerCallTo(CLI); @@ -121,8 +150,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - SDLoc dl) const { - assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + const SDLoc &dl) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); // Expand into one or more soft-fp libcall(s). @@ -132,53 +161,65 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, case ISD::SETEQ: case ISD::SETOEQ: LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : - (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + (VT == MVT::f64) ? RTLIB::OEQ_F64 : + (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; break; case ISD::SETNE: case ISD::SETUNE: LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : - (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128; + (VT == MVT::f64) ? RTLIB::UNE_F64 : + (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128; break; case ISD::SETGE: case ISD::SETOGE: LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : - (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + (VT == MVT::f64) ? RTLIB::OGE_F64 : + (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; break; case ISD::SETLT: case ISD::SETOLT: LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; break; case ISD::SETLE: case ISD::SETOLE: LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : - (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + (VT == MVT::f64) ? RTLIB::OLE_F64 : + (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; break; case ISD::SETGT: case ISD::SETOGT: LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; case ISD::SETUO: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : - (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + (VT == MVT::f64) ? RTLIB::UO_F64 : + (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; break; case ISD::SETO: LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : - (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; + (VT == MVT::f64) ? RTLIB::O_F64 : + (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128; break; case ISD::SETONE: // SETONE = SETOLT | SETOGT LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : - (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + (VT == MVT::f64) ? RTLIB::UO_F64 : + (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128; LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : - (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + (VT == MVT::f64) ? RTLIB::OEQ_F64 : + (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; break; default: // Invert CC for unordered comparisons @@ -186,19 +227,23 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, switch (CCCode) { case ISD::SETULT: LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : - (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + (VT == MVT::f64) ? RTLIB::OGE_F64 : + (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; break; case ISD::SETULE: LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; case ISD::SETUGT: LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : - (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + (VT == MVT::f64) ? RTLIB::OLE_F64 : + (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; break; case ISD::SETUGE: LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; break; default: llvm_unreachable("Do not know how to soften this setcc!"); } @@ -235,7 +280,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. unsigned TargetLowering::getJumpTableEncoding() const { // In non-pic modes, just use the address of a block. - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + if (!isPositionIndependent()) return MachineJumpTableInfo::EK_BlockAddress; // In PIC mode, if the target supports a GPRel32 directive, use it. @@ -269,17 +314,20 @@ TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, bool TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // Assume that everything is safe in static mode. - if (getTargetMachine().getRelocationModel() == Reloc::Static) - return true; + const TargetMachine &TM = getTargetMachine(); + const GlobalValue *GV = GA->getGlobal(); - // In dynamic-no-pic mode, assume that known defined values are safe. - if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && - GA && GA->getGlobal()->isStrongDefinitionForLinker()) - return true; + // If the address is not even local to this DSO we will have to load it from + // a got and then add the offset. + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return false; - // Otherwise assume nothing is safe. - return false; + // If the code is position independent we will have to add a base register. + if (isPositionIndependent()) + return false; + + // Otherwise we can do it. + return true; } //===----------------------------------------------------------------------===// @@ -326,11 +374,10 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. -bool -TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, - unsigned BitWidth, - const APInt &Demanded, - SDLoc dl) { +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + const SDLoc &dl) { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && @@ -407,7 +454,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, NewMask = APInt::getAllOnesValue(BitWidth); } else if (DemandedMask == 0) { // Not demanding any bits from Op. - if (Op.getOpcode() != ISD::UNDEF) + if (!Op.isUndef()) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); return false; } else if (Depth == 6) { // Limit search depth. @@ -1157,37 +1204,6 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } -/// Test if the given value is known to have exactly one bit set. This differs -/// from computeKnownBits in that it doesn't need to determine which bit is set. -static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { - // A left-shift of a constant one will have exactly one bit set, because - // shifting the bit off the end is undefined. - if (Val.getOpcode() == ISD::SHL) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) - if (C->getAPIntValue() == 1) - return true; - - // Similarly, a right-shift of a constant sign-bit will have exactly - // one bit set. - if (Val.getOpcode() == ISD::SRL) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) - if (C->getAPIntValue().isSignBit()) - return true; - - // More could be done here, though the above checks are enough - // to handle some common cases. - - // Fall back to computeKnownBits to catch other known cases. - EVT OpVT = Val.getValueType(); - unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Val, KnownZero, KnownOne); - return (KnownZero.countPopulation() == BitWidth - 1) && - (KnownOne.countPopulation() == 1); -} - bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; @@ -1242,12 +1258,91 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { return CN->isNullValue(); } +bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, + bool SExt) const { + if (VT == MVT::i1) + return N->isOne(); + + TargetLowering::BooleanContent Cnt = getBooleanContents(VT); + switch (Cnt) { + case TargetLowering::ZeroOrOneBooleanContent: + // An extended value of 1 is always true, unless its original type is i1, + // in which case it will be sign extended to -1. + return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); + case TargetLowering::UndefinedBooleanContent: + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return N->isAllOnesValue() && SExt; + } + llvm_unreachable("Unexpected enumeration."); +} + +/// This helper function of SimplifySetCC tries to optimize the comparison when +/// either operand of the SetCC node is a bitwise-and instruction. +SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y + if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) + std::swap(N0, N1); + + EVT OpVT = N0.getValueType(); + if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || + (Cond != ISD::SETEQ && Cond != ISD::SETNE)) + return SDValue(); + + SDValue X, Y; + if (N0.getOperand(0) == N1) { + X = N0.getOperand(1); + Y = N0.getOperand(0); + } else if (N0.getOperand(1) == N1) { + X = N0.getOperand(0); + Y = N0.getOperand(1); + } else { + return SDValue(); + } + + SelectionDAG &DAG = DCI.DAG; + SDValue Zero = DAG.getConstant(0, DL, OpVT); + if (DAG.isKnownToBeAPowerOfTwo(Y)) { + // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. + // Note that where Y is variable and is known to have at most one bit set + // (for example, if it is Z & 1) we cannot do this; the expressions are not + // equivalent when Y == 0. + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) + return DAG.getSetCC(DL, VT, N0, Zero, Cond); + } else if (N0.hasOneUse() && hasAndNotCompare(Y)) { + // If the target supports an 'and-not' or 'and-complement' logic operation, + // try to use that to make a comparison operation more efficient. + // But don't do this transform if the mask is a single bit because there are + // more efficient ways to deal with that case (for example, 'bt' on x86 or + // 'rlwinm' on PPC). + + // Bail out if the compare operand that we want to turn into a zero is + // already a zero (otherwise, infinite loop). + auto *YConst = dyn_cast<ConstantSDNode>(Y); + if (YConst && YConst->isNullValue()) + return SDValue(); + + // Transform this into: ~X & Y == 0. + SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT); + SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y); + return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. -SDValue -TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, - ISD::CondCode Cond, bool foldBooleans, - DAGCombinerInfo &DCI, SDLoc dl) const { +SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, + const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; // These setcc operations always fold. @@ -1376,6 +1471,38 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } + + // If truncating the setcc operands is not desirable, we can still + // simplify the expression in some cases: + // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc) + // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc)) + // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc)) + // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc) + // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc)) + // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc) + SDValue TopSetCC = N0->getOperand(0); + unsigned N0Opc = N0->getOpcode(); + bool SExt = (N0Opc == ISD::SIGN_EXTEND); + if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 && + TopSetCC.getOpcode() == ISD::SETCC && + (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) && + (isConstFalseVal(N1C) || + isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { + + bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || + (!N1C->isNullValue() && Cond == ISD::SETNE); + + if (!Inverse) + return TopSetCC; + + ISD::CondCode InvCond = ISD::getSetCCInverse( + cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), + TopSetCC.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), + TopSetCC.getOperand(1), + InvCond); + + } } } @@ -1426,9 +1553,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), DAG.getConstant(bestOffset, dl, PtrType)); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); - SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getPointerInfo().getWithOffset(bestOffset), - false, false, false, NewAlign); + SDValue NewLoad = DAG.getLoad( + newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -1994,32 +2121,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - // Simplify x&y == y to x&y != 0 if y has exactly one bit set. - // Note that where y is variable and is known to have at most - // one bit set (for example, if it is z&1) we cannot do this; - // the expressions are not equivalent when y==0. - if (N0.getOpcode() == ISD::AND) - if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { - if (ValueHasExactlyOneBitSet(N1, DAG)) { - Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - if (DCI.isBeforeLegalizeOps() || - isCondCodeLegal(Cond, N0.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, dl, N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, Zero, Cond); - } - } - } - if (N1.getOpcode() == ISD::AND) - if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { - if (ValueHasExactlyOneBitSet(N0, DAG)) { - Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - if (DCI.isBeforeLegalizeOps() || - isCondCodeLegal(Cond, N1.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); - return DAG.getSetCC(dl, VT, N1, Zero, Cond); - } - } - } + if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl)) + return V; } // Fold away ALL boolean setcc's. @@ -2202,8 +2305,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); } - if (!C || !GA) - C = nullptr, GA = nullptr; + if (!C || !GA) { + C = nullptr; + GA = nullptr; + } } // If we find a valid operand, map to the TargetXXX version so that the @@ -2260,7 +2365,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (RegName.equals_lower(RI->getName(*I))) { + if (RegName.equals_lower(RI->getRegAsmName(*I))) { std::pair<unsigned, const TargetRegisterClass*> S = std::make_pair(*I, RC); @@ -2680,7 +2785,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// \brief Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, - SDLoc dl, SelectionDAG &DAG, + const SDLoc &dl, SelectionDAG &DAG, std::vector<SDNode *> &Created) { assert(d != 0 && "Division by zero!"); @@ -3039,6 +3144,370 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, return true; } +SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, + SelectionDAG &DAG) const { + SDLoc SL(LD); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + unsigned NumElem = SrcVT.getVectorNumElements(); + + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = LD->getValueType(0).getScalarType(); + + unsigned Stride = SrcEltVT.getSizeInBits() / 8; + assert(SrcEltVT.isByteSized()); + + EVT PtrVT = BasePTR.getValueType(); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue ScalarLoad = + DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + + BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR, + DAG.getConstant(Stride, SL, PtrVT)); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals); + + return DAG.getMergeValues({ Value, NewChain }, SL); +} + +// FIXME: This relies on each element having a byte size, otherwise the stride +// is 0 and just overwrites the same location. ExpandStore currently expects +// this broken behavior. +SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, + SelectionDAG &DAG) const { + SDLoc SL(ST); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + EVT PtrVT = BasePtr.getValueType(); + + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits() / 8; + EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); + unsigned NumElem = StVT.getVectorNumElements(); + + // Extract each of the elements from the original vector and save them into + // memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(Idx * Stride, SL, PtrVT)); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore( + Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride), + MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + + Stores.push_back(Store); + } + + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores); +} + +std::pair<SDValue, SDValue> +TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); + SDLoc dl(LD); + if (VT.isFloatingPoint() || VT.isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); + if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { + if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { + // Scalarize the load and let the individual components be handled. + SDValue Scalarized = scalarizeVectorLoad(LD, DAG); + return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); + } + + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); + + return std::make_pair(Result, newLoad.getValue(1)); + } + + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + EVT PtrVT = Ptr.getValueType(); + EVT StackPtrVT = StackPtr.getValueType(); + + SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); + SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad( + RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), + MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo())); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr, + StackPtrIncrement); + } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), MemVT, + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), MemVT)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + MachinePointerInfo(), LoadedVT); + + // Callers expect a MERGE_VALUES node. + return std::make_pair(Load, TF); + } + + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (DAG.getDataLayout().isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } + + // aggregate the two parts + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(), + DAG.getDataLayout())); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + return std::make_pair(Result, TF); +} + +SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, + SelectionDAG &DAG) const { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + EVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + + SDLoc dl(ST); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (isTypeLegal(intVT)) { + if (!isOperationLegalOrCustom(ISD::STORE, intVT)) { + // Scalarize the store and let the individual components be handled. + SDValue Result = scalarizeVectorStore(ST, DAG); + + return Result; + } + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + Alignment, ST->getMemOperand()->getFlags()); + return Result; + } + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + EVT StoredVT = ST->getMemoryVT(); + MVT RegVT = + getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); + EVT PtrVT = Ptr.getValueType(); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, + MachinePointerInfo(), StoredVT); + + EVT StackPtrVT = StackPtr.getValueType(); + + SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); + SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = + DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo()); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), + MinAlign(ST->getAlignment(), Offset), + ST->getMemOperand()->getFlags())); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, + StackPtr, StackPtrIncrement); + Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), MemVT); + + Stores.push_back( + DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), MemVT, + MinAlign(ST->getAlignment(), Offset), + ST->getMemOperand()->getFlags(), ST->getAAInfo())); + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + return Result; + } + + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(), + DAG.getDataLayout())); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, + DAG.getDataLayout().isLittleEndian() ? Lo : Hi, + Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, + ST->getMemOperand()->getFlags()); + + EVT PtrVT = Ptr.getValueType(); + Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(IncrementSize, dl, PtrVT)); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore( + Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + return Result; +} + //===----------------------------------------------------------------------===// // Implementation of Emulated TLS Model //===----------------------------------------------------------------------===// @@ -3057,9 +3526,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent()); StringRef EmuTlsVarName(NameString); GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); - if (!EmuTlsVar) - EmuTlsVar = dyn_cast_or_null<GlobalVariable>( - VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + assert(EmuTlsVar && "Cannot find EmuTlsVar "); Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); Entry.Ty = VoidPtrType; Args.push_back(Entry); @@ -3068,7 +3535,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); - CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. |